rgrove-sanitize 1.0.8.3 → 1.0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY +3 -1
- data/lib/sanitize.rb +19 -25
- data/lib/sanitize/config.rb +10 -6
- data/lib/sanitize/version.rb +1 -1
- metadata +4 -3
data/HISTORY
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
Sanitize History
|
2
2
|
================================================================================
|
3
3
|
|
4
|
-
Version 1.0.8.
|
4
|
+
Version 1.0.8.4 (git)
|
5
5
|
* Migrated from Hpricot to Nokogiri. Requires libxml2 >= 2.7.2 [Adam Hooper]
|
6
|
+
* Added an :output config setting to allow the output format to be specified.
|
7
|
+
Supported formats are :xhtml (the default) and :html (which outputs HTML4).
|
6
8
|
* Changed protocol regex to ensure Sanitize doesn't kill URLs with colons in
|
7
9
|
path segments. [Peter Cooper]
|
8
10
|
|
data/lib/sanitize.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# encoding: utf-8
|
1
2
|
#--
|
2
3
|
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
4
|
#
|
@@ -29,18 +30,6 @@ require 'sanitize/config/relaxed'
|
|
29
30
|
|
30
31
|
class Sanitize
|
31
32
|
|
32
|
-
# Characters that should be replaced with entities in text nodes.
|
33
|
-
ENTITY_MAP = {
|
34
|
-
'<' => '<',
|
35
|
-
'>' => '>',
|
36
|
-
'"' => '"',
|
37
|
-
"'" => '''
|
38
|
-
}
|
39
|
-
|
40
|
-
# Matches an unencoded ampersand that is not part of a valid character entity
|
41
|
-
# reference.
|
42
|
-
REGEX_AMPERSAND = /&(?!(?:[a-z]+[0-9]{0,2}|#[0-9]+|#x[0-9a-f]+);)/i
|
43
|
-
|
44
33
|
# Matches an attribute value that could be treated by a browser as a URL
|
45
34
|
# with a protocol prefix, such as "http:" or "javascript:". Any string of zero
|
46
35
|
# or more characters followed by a colon is considered a match, even if the
|
@@ -124,7 +113,24 @@ class Sanitize
|
|
124
113
|
end
|
125
114
|
end
|
126
115
|
|
127
|
-
|
116
|
+
if @config[:output] == :xhtml
|
117
|
+
output_method = fragment.method(:to_xhtml)
|
118
|
+
elsif @config[:output] == :html
|
119
|
+
output_method = fragment.method(:to_html)
|
120
|
+
else
|
121
|
+
raise Error, "unsupported output format: #{@config[:output]}"
|
122
|
+
end
|
123
|
+
|
124
|
+
if RUBY_VERSION >= '1.9'
|
125
|
+
# Nokogiri 1.3.3 (and possibly earlier versions) always returns a US-ASCII
|
126
|
+
# string no matter what we ask for. This will be fixed in 1.4.0, but for
|
127
|
+
# now we have to hack around it to prevent errors.
|
128
|
+
result = output_method.call(:encoding => 'utf-8', :indent => 0).force_encoding('utf-8')
|
129
|
+
result.gsub!(">\n", '>')
|
130
|
+
else
|
131
|
+
result = output_method.call(:encoding => 'utf-8', :indent => 0).gsub(">\n", '>')
|
132
|
+
end
|
133
|
+
|
128
134
|
return result == html ? nil : html[0, html.length] = result
|
129
135
|
end
|
130
136
|
|
@@ -146,18 +152,6 @@ class Sanitize
|
|
146
152
|
sanitize = Sanitize.new(config)
|
147
153
|
sanitize.clean!(html)
|
148
154
|
end
|
149
|
-
|
150
|
-
# Encodes special HTML characters (<, >, ", ', and &) in _html_ as entity
|
151
|
-
# references and returns the encoded string.
|
152
|
-
def encode_html(html)
|
153
|
-
str = html.dup
|
154
|
-
|
155
|
-
# Encode special chars.
|
156
|
-
ENTITY_MAP.each {|char, entity| str.gsub!(char, entity) }
|
157
|
-
|
158
|
-
# Convert unencoded ampersands to entity references.
|
159
|
-
str.gsub(REGEX_AMPERSAND, '&')
|
160
|
-
end
|
161
155
|
end
|
162
156
|
|
163
157
|
end
|
data/lib/sanitize/config.rb
CHANGED
@@ -28,17 +28,21 @@ class Sanitize
|
|
28
28
|
# comments.
|
29
29
|
:allow_comments => false,
|
30
30
|
|
31
|
-
# HTML
|
32
|
-
#
|
33
|
-
:
|
31
|
+
# HTML attributes to add to specific elements. By default, no attributes
|
32
|
+
# are added.
|
33
|
+
:add_attributes => {},
|
34
34
|
|
35
35
|
# HTML attributes to allow in specific elements. By default, no attributes
|
36
36
|
# are allowed.
|
37
37
|
:attributes => {},
|
38
38
|
|
39
|
-
# HTML
|
40
|
-
#
|
41
|
-
:
|
39
|
+
# HTML elements to allow. By default, no elements are allowed (which means
|
40
|
+
# that all HTML will be stripped).
|
41
|
+
:elements => [],
|
42
|
+
|
43
|
+
# Output format. Supported formats are :html and :xhtml (which is the
|
44
|
+
# default).
|
45
|
+
:output => :xhtml,
|
42
46
|
|
43
47
|
# URL handling protocols to allow in specific attributes. By default, no
|
44
48
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
data/lib/sanitize/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rgrove-sanitize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.8.
|
4
|
+
version: 1.0.8.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Grove
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-09-
|
12
|
+
date: 2009-09-17 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -62,6 +62,7 @@ files:
|
|
62
62
|
- lib/sanitize.rb
|
63
63
|
has_rdoc: false
|
64
64
|
homepage: http://github.com/rgrove/sanitize/
|
65
|
+
licenses:
|
65
66
|
post_install_message:
|
66
67
|
rdoc_options: []
|
67
68
|
|
@@ -82,7 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
82
83
|
requirements: []
|
83
84
|
|
84
85
|
rubyforge_project:
|
85
|
-
rubygems_version: 1.
|
86
|
+
rubygems_version: 1.3.5
|
86
87
|
signing_key:
|
87
88
|
specification_version: 3
|
88
89
|
summary: Whitelist-based HTML sanitizer.
|