sanitize 1.2.2.dev.20101118 → 1.3.0.dev.20101210
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- data/HISTORY +14 -1
- data/README.rdoc +16 -3
- data/lib/sanitize/config/basic.rb +9 -7
- data/lib/sanitize/config/relaxed.rb +17 -13
- data/lib/sanitize/config/restricted.rb +1 -1
- data/lib/sanitize/config.rb +11 -4
- data/lib/sanitize/version.rb +1 -1
- data/lib/sanitize.rb +12 -2
- metadata +10 -10
data/HISTORY
CHANGED
@@ -1,7 +1,20 @@
|
|
1
1
|
Sanitize History
|
2
2
|
================================================================================
|
3
3
|
|
4
|
-
Version 1.
|
4
|
+
Version 1.3.0 (git)
|
5
|
+
* The default value for the :output config is now :html. Previously it was
|
6
|
+
:xhtml.
|
7
|
+
* Added a :whitespace_elements config, which specifies elements (such as <br>
|
8
|
+
and <p>) that should be replaced with whitespace when removed in order to
|
9
|
+
preserve readability. See the README for the default list of elements that
|
10
|
+
will be replaced with whitespace when removed.
|
11
|
+
* Added the `abbr`, `dfn`, `kbd`, `mark`, `s`, `samp`, `time`, and `var`
|
12
|
+
elements to the whitelists for `Sanitize::Config::BASIC` and
|
13
|
+
`Sanitize::Config::RELAXED`.
|
14
|
+
* Added the `bdo`, `del`, `figcaption`, `figure`, `hgroup`, `ins`, `rp`, `rt`,
|
15
|
+
`ruby`, and `wbr` elements to the whitelist for `Sanitize::Config::RELAXED`.
|
16
|
+
* The `dir`, `lang`, and `title` attributes are now whitelisted for all
|
17
|
+
elements in `Sanitize::Config::RELAXED`.
|
5
18
|
* The environment hash passed into transformers now includes an
|
6
19
|
:allowed_elements Hash to facilitate faster lookups when attempting to
|
7
20
|
determine whether an element is in the whitelist. [Suggested by Nicholas
|
data/README.rdoc
CHANGED
@@ -14,14 +14,14 @@ of fragile regular expressions, Sanitize has no trouble dealing with malformed
|
|
14
14
|
or maliciously-formed HTML, and will always output valid HTML or XHTML.
|
15
15
|
|
16
16
|
*Author*:: Ryan Grove (mailto:ryan@wonko.com)
|
17
|
-
*Version*:: 1.
|
17
|
+
*Version*:: 1.3.0 (git)
|
18
18
|
*Copyright*:: Copyright (c) 2010 Ryan Grove. All rights reserved.
|
19
19
|
*License*:: MIT License (http://opensource.org/licenses/mit-license.php)
|
20
20
|
*Website*:: http://github.com/rgrove/sanitize
|
21
21
|
|
22
22
|
== Requires
|
23
23
|
|
24
|
-
* Nokogiri ~> 1.4.
|
24
|
+
* Nokogiri ~> 1.4.4
|
25
25
|
* libxml2 >= 2.7.2
|
26
26
|
|
27
27
|
== Installation
|
@@ -136,7 +136,7 @@ Array of element names to allow. Specify all names in lowercase.
|
|
136
136
|
==== :output (Symbol)
|
137
137
|
|
138
138
|
Output format. Supported formats are <code>:html</code> and <code>:xhtml</code>,
|
139
|
-
defaulting to <code>:
|
139
|
+
defaulting to <code>:html</code>.
|
140
140
|
|
141
141
|
==== :output_encoding (String)
|
142
142
|
|
@@ -181,6 +181,19 @@ The default value is <code>false</code>.
|
|
181
181
|
|
182
182
|
See below.
|
183
183
|
|
184
|
+
==== :whitespace_elements (Array)
|
185
|
+
|
186
|
+
Array of lowercase element names that should be replaced with whitespace when
|
187
|
+
removed in order to preserve readability. For example,
|
188
|
+
<code>foo<div>bar</div>baz</code> will become
|
189
|
+
<code>foo bar baz</code> when the <code><div></code> is removed.
|
190
|
+
|
191
|
+
By default, the following elements are included in the
|
192
|
+
<code>:whitespace_elements</code> array:
|
193
|
+
|
194
|
+
address article aside blockquote br dd div dl dt footer h1 h2 h3 h4 h5
|
195
|
+
h6 header hgroup hr li nav ol p pre section ul
|
196
|
+
|
184
197
|
=== Transformers
|
185
198
|
|
186
199
|
Transformers allow you to filter and alter nodes using your own custom logic, on
|
@@ -23,15 +23,18 @@
|
|
23
23
|
class Sanitize
|
24
24
|
module Config
|
25
25
|
BASIC = {
|
26
|
-
:elements => [
|
27
|
-
|
28
|
-
|
29
|
-
|
26
|
+
:elements => %w[
|
27
|
+
a abbr b blockquote br cite code dd dfn dl dt em i kbd li mark ol p pre
|
28
|
+
q s samp small strike strong sub sup time u ul var
|
29
|
+
],
|
30
30
|
|
31
31
|
:attributes => {
|
32
32
|
'a' => ['href'],
|
33
|
+
'abbr' => ['title'],
|
33
34
|
'blockquote' => ['cite'],
|
34
|
-
'
|
35
|
+
'dfn' => ['title'],
|
36
|
+
'q' => ['cite'],
|
37
|
+
'time' => ['datetime', 'pubdate']
|
35
38
|
},
|
36
39
|
|
37
40
|
:add_attributes => {
|
@@ -39,8 +42,7 @@ class Sanitize
|
|
39
42
|
},
|
40
43
|
|
41
44
|
:protocols => {
|
42
|
-
'a' => {'href' => ['ftp', 'http', 'https', 'mailto',
|
43
|
-
:relative]},
|
45
|
+
'a' => {'href' => ['ftp', 'http', 'https', 'mailto', :relative]},
|
44
46
|
'blockquote' => {'cite' => ['http', 'https', :relative]},
|
45
47
|
'q' => {'cite' => ['http', 'https', :relative]}
|
46
48
|
}
|
@@ -23,33 +23,37 @@
|
|
23
23
|
class Sanitize
|
24
24
|
module Config
|
25
25
|
RELAXED = {
|
26
|
-
:elements => [
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
26
|
+
:elements => %w[
|
27
|
+
a abbr b bdo blockquote br caption cite code col colgroup dd del dfn dl
|
28
|
+
dt em figcaption figure h1 h2 h3 h4 h5 h6 hgroup i img ins kbd li mark
|
29
|
+
ol p pre q rp rt ruby s samp small strike strong sub sup table tbody td
|
30
|
+
tfoot th thead time tr u ul var wbr
|
31
|
+
],
|
32
32
|
|
33
33
|
:attributes => {
|
34
|
-
|
34
|
+
:all => ['dir', 'lang', 'title'],
|
35
|
+
'a' => ['href'],
|
35
36
|
'blockquote' => ['cite'],
|
36
37
|
'col' => ['span', 'width'],
|
37
38
|
'colgroup' => ['span', 'width'],
|
38
|
-
'
|
39
|
-
'
|
39
|
+
'del' => ['cite', 'datetime'],
|
40
|
+
'img' => ['align', 'alt', 'height', 'src', 'width'],
|
41
|
+
'ins' => ['cite', 'datetime'],
|
42
|
+
'ol' => ['start', 'reversed', 'type'],
|
40
43
|
'q' => ['cite'],
|
41
44
|
'table' => ['summary', 'width'],
|
42
45
|
'td' => ['abbr', 'axis', 'colspan', 'rowspan', 'width'],
|
43
|
-
'th' => ['abbr', 'axis', 'colspan', 'rowspan', 'scope',
|
44
|
-
|
46
|
+
'th' => ['abbr', 'axis', 'colspan', 'rowspan', 'scope', 'width'],
|
47
|
+
'time' => ['datetime', 'pubdate'],
|
45
48
|
'ul' => ['type']
|
46
49
|
},
|
47
50
|
|
48
51
|
:protocols => {
|
49
|
-
'a' => {'href' => ['ftp', 'http', 'https', 'mailto',
|
50
|
-
:relative]},
|
52
|
+
'a' => {'href' => ['ftp', 'http', 'https', 'mailto', :relative]},
|
51
53
|
'blockquote' => {'cite' => ['http', 'https', :relative]},
|
54
|
+
'del' => {'cite' => ['http', 'https', :relative]},
|
52
55
|
'img' => {'src' => ['http', 'https', :relative]},
|
56
|
+
'ins' => {'cite' => ['http', 'https', :relative]},
|
53
57
|
'q' => {'cite' => ['http', 'https', :relative]}
|
54
58
|
}
|
55
59
|
}
|
data/lib/sanitize/config.rb
CHANGED
@@ -41,9 +41,8 @@ class Sanitize
|
|
41
41
|
# that all HTML will be stripped).
|
42
42
|
:elements => [],
|
43
43
|
|
44
|
-
# Output format. Supported formats are :html and :xhtml
|
45
|
-
|
46
|
-
:output => :xhtml,
|
44
|
+
# Output format. Supported formats are :html and :xhtml. Default is :html.
|
45
|
+
:output => :html,
|
47
46
|
|
48
47
|
# Character encoding to use for HTML output. Default is 'utf-8'.
|
49
48
|
:output_encoding => 'utf-8',
|
@@ -69,8 +68,16 @@ class Sanitize
|
|
69
68
|
|
70
69
|
# Transformers allow you to filter or alter nodes using custom logic. See
|
71
70
|
# README.rdoc for details and examples.
|
72
|
-
:transformers => []
|
71
|
+
:transformers => [],
|
73
72
|
|
73
|
+
# Elements which, when removed, should have their contents surrounded by
|
74
|
+
# space characters to preserve readability. For example,
|
75
|
+
# `foo<div>bar</div>baz` will become 'foo bar baz' when the <div> is
|
76
|
+
# removed.
|
77
|
+
:whitespace_elements => %w[
|
78
|
+
address article aside blockquote br dd div dl dt footer h1 h2 h3 h4 h5
|
79
|
+
h6 header hgroup hr li nav ol p pre section ul
|
80
|
+
]
|
74
81
|
}
|
75
82
|
end
|
76
83
|
end
|
data/lib/sanitize/version.rb
CHANGED
data/lib/sanitize.rb
CHANGED
@@ -72,9 +72,12 @@ class Sanitize
|
|
72
72
|
@config = Config::DEFAULT.merge(config)
|
73
73
|
@config[:transformers] = Array(@config[:transformers].dup)
|
74
74
|
|
75
|
-
# Convert
|
76
|
-
@allowed_elements
|
75
|
+
# Convert arrays to hashes for faster lookups.
|
76
|
+
@allowed_elements = {}
|
77
|
+
@whitespace_elements = {}
|
78
|
+
|
77
79
|
@config[:elements].each {|el| @allowed_elements[el] = true }
|
80
|
+
@config[:whitespace_elements].each {|el| @whitespace_elements[el] = true }
|
78
81
|
|
79
82
|
# Convert the list of :remove_contents elements to a Hash for faster lookup.
|
80
83
|
@remove_all_contents = false
|
@@ -157,6 +160,13 @@ class Sanitize
|
|
157
160
|
|
158
161
|
# Delete any element that isn't in the whitelist.
|
159
162
|
unless transform[:whitelist] || @allowed_elements[name]
|
163
|
+
# Elements like br, div, p, etc. need to be replaced with whitespace in
|
164
|
+
# order to preserve readability.
|
165
|
+
if @whitespace_elements[name]
|
166
|
+
node.add_previous_sibling(' ')
|
167
|
+
node.add_next_sibling(' ') unless node.children.empty?
|
168
|
+
end
|
169
|
+
|
160
170
|
unless @remove_all_contents || @remove_element_contents[name]
|
161
171
|
node.children.each { |n| node.add_previous_sibling(n) }
|
162
172
|
end
|
metadata
CHANGED
@@ -4,11 +4,11 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: true
|
5
5
|
segments:
|
6
6
|
- 1
|
7
|
-
-
|
8
|
-
-
|
7
|
+
- 3
|
8
|
+
- 0
|
9
9
|
- dev
|
10
|
-
-
|
11
|
-
version: 1.
|
10
|
+
- 20101210
|
11
|
+
version: 1.3.0.dev.20101210
|
12
12
|
platform: ruby
|
13
13
|
authors:
|
14
14
|
- Ryan Grove
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-
|
19
|
+
date: 2010-12-10 00:00:00 -08:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
@@ -35,7 +35,7 @@ dependencies:
|
|
35
35
|
type: :runtime
|
36
36
|
version_requirements: *id001
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
|
-
name:
|
38
|
+
name: minitest
|
39
39
|
prerelease: false
|
40
40
|
requirement: &id002 !ruby/object:Gem::Requirement
|
41
41
|
none: false
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
- - ~>
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
segments:
|
46
|
-
-
|
47
|
-
-
|
46
|
+
- 2
|
47
|
+
- 0
|
48
48
|
- 0
|
49
|
-
version:
|
49
|
+
version: 2.0.0
|
50
50
|
type: :development
|
51
51
|
version_requirements: *id002
|
52
52
|
- !ruby/object:Gem::Dependency
|
@@ -83,7 +83,7 @@ files:
|
|
83
83
|
- lib/sanitize/version.rb
|
84
84
|
- lib/sanitize.rb
|
85
85
|
has_rdoc: true
|
86
|
-
homepage:
|
86
|
+
homepage: https://github.com/rgrove/sanitize/
|
87
87
|
licenses: []
|
88
88
|
|
89
89
|
post_install_message:
|