rgrove-sanitize 1.0.8.3 → 1.0.8.4
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY +3 -1
- data/lib/sanitize.rb +19 -25
- data/lib/sanitize/config.rb +10 -6
- data/lib/sanitize/version.rb +1 -1
- metadata +4 -3
data/HISTORY
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
Sanitize History
|
2
2
|
================================================================================
|
3
3
|
|
4
|
-
Version 1.0.8.
|
4
|
+
Version 1.0.8.4 (git)
|
5
5
|
* Migrated from Hpricot to Nokogiri. Requires libxml2 >= 2.7.2 [Adam Hooper]
|
6
|
+
* Added an :output config setting to allow the output format to be specified.
|
7
|
+
Supported formats are :xhtml (the default) and :html (which outputs HTML4).
|
6
8
|
* Changed protocol regex to ensure Sanitize doesn't kill URLs with colons in
|
7
9
|
path segments. [Peter Cooper]
|
8
10
|
|
data/lib/sanitize.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# encoding: utf-8
|
1
2
|
#--
|
2
3
|
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
4
|
#
|
@@ -29,18 +30,6 @@ require 'sanitize/config/relaxed'
|
|
29
30
|
|
30
31
|
class Sanitize
|
31
32
|
|
32
|
-
# Characters that should be replaced with entities in text nodes.
|
33
|
-
ENTITY_MAP = {
|
34
|
-
'<' => '<',
|
35
|
-
'>' => '>',
|
36
|
-
'"' => '"',
|
37
|
-
"'" => '''
|
38
|
-
}
|
39
|
-
|
40
|
-
# Matches an unencoded ampersand that is not part of a valid character entity
|
41
|
-
# reference.
|
42
|
-
REGEX_AMPERSAND = /&(?!(?:[a-z]+[0-9]{0,2}|#[0-9]+|#x[0-9a-f]+);)/i
|
43
|
-
|
44
33
|
# Matches an attribute value that could be treated by a browser as a URL
|
45
34
|
# with a protocol prefix, such as "http:" or "javascript:". Any string of zero
|
46
35
|
# or more characters followed by a colon is considered a match, even if the
|
@@ -124,7 +113,24 @@ class Sanitize
|
|
124
113
|
end
|
125
114
|
end
|
126
115
|
|
127
|
-
|
116
|
+
if @config[:output] == :xhtml
|
117
|
+
output_method = fragment.method(:to_xhtml)
|
118
|
+
elsif @config[:output] == :html
|
119
|
+
output_method = fragment.method(:to_html)
|
120
|
+
else
|
121
|
+
raise Error, "unsupported output format: #{@config[:output]}"
|
122
|
+
end
|
123
|
+
|
124
|
+
if RUBY_VERSION >= '1.9'
|
125
|
+
# Nokogiri 1.3.3 (and possibly earlier versions) always returns a US-ASCII
|
126
|
+
# string no matter what we ask for. This will be fixed in 1.4.0, but for
|
127
|
+
# now we have to hack around it to prevent errors.
|
128
|
+
result = output_method.call(:encoding => 'utf-8', :indent => 0).force_encoding('utf-8')
|
129
|
+
result.gsub!(">\n", '>')
|
130
|
+
else
|
131
|
+
result = output_method.call(:encoding => 'utf-8', :indent => 0).gsub(">\n", '>')
|
132
|
+
end
|
133
|
+
|
128
134
|
return result == html ? nil : html[0, html.length] = result
|
129
135
|
end
|
130
136
|
|
@@ -146,18 +152,6 @@ class Sanitize
|
|
146
152
|
sanitize = Sanitize.new(config)
|
147
153
|
sanitize.clean!(html)
|
148
154
|
end
|
149
|
-
|
150
|
-
# Encodes special HTML characters (<, >, ", ', and &) in _html_ as entity
|
151
|
-
# references and returns the encoded string.
|
152
|
-
def encode_html(html)
|
153
|
-
str = html.dup
|
154
|
-
|
155
|
-
# Encode special chars.
|
156
|
-
ENTITY_MAP.each {|char, entity| str.gsub!(char, entity) }
|
157
|
-
|
158
|
-
# Convert unencoded ampersands to entity references.
|
159
|
-
str.gsub(REGEX_AMPERSAND, '&')
|
160
|
-
end
|
161
155
|
end
|
162
156
|
|
163
157
|
end
|
data/lib/sanitize/config.rb
CHANGED
@@ -28,17 +28,21 @@ class Sanitize
|
|
28
28
|
# comments.
|
29
29
|
:allow_comments => false,
|
30
30
|
|
31
|
-
# HTML
|
32
|
-
#
|
33
|
-
:
|
31
|
+
# HTML attributes to add to specific elements. By default, no attributes
|
32
|
+
# are added.
|
33
|
+
:add_attributes => {},
|
34
34
|
|
35
35
|
# HTML attributes to allow in specific elements. By default, no attributes
|
36
36
|
# are allowed.
|
37
37
|
:attributes => {},
|
38
38
|
|
39
|
-
# HTML
|
40
|
-
#
|
41
|
-
:
|
39
|
+
# HTML elements to allow. By default, no elements are allowed (which means
|
40
|
+
# that all HTML will be stripped).
|
41
|
+
:elements => [],
|
42
|
+
|
43
|
+
# Output format. Supported formats are :html and :xhtml (which is the
|
44
|
+
# default).
|
45
|
+
:output => :xhtml,
|
42
46
|
|
43
47
|
# URL handling protocols to allow in specific attributes. By default, no
|
44
48
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
data/lib/sanitize/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rgrove-sanitize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.8.
|
4
|
+
version: 1.0.8.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Grove
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-09-
|
12
|
+
date: 2009-09-17 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -62,6 +62,7 @@ files:
|
|
62
62
|
- lib/sanitize.rb
|
63
63
|
has_rdoc: false
|
64
64
|
homepage: http://github.com/rgrove/sanitize/
|
65
|
+
licenses:
|
65
66
|
post_install_message:
|
66
67
|
rdoc_options: []
|
67
68
|
|
@@ -82,7 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
82
83
|
requirements: []
|
83
84
|
|
84
85
|
rubyforge_project:
|
85
|
-
rubygems_version: 1.
|
86
|
+
rubygems_version: 1.3.5
|
86
87
|
signing_key:
|
87
88
|
specification_version: 3
|
88
89
|
summary: Whitelist-based HTML sanitizer.
|