html-pipeline 1.4.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/html/pipeline/sanitization_filter.rb +18 -5
- data/lib/html/pipeline/version.rb +1 -1
- data/test/html/pipeline/sanitization_filter_test.rb +59 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 283d2eec685e3f57e423f5ef348389a7f5d29f90
|
4
|
+
data.tar.gz: a007485225e182efd0a158bf59c9e4df08da49f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 98e70609b064b690455547da4f61f9746f36fd3fc2b06ea8e4e5a1cd15fb75b3294d466c2336f36b3c650c1cca298cabd430fb39a65cbbf92040e1adb8ef0815
|
7
|
+
data.tar.gz: 24ccfcf8af8ce40388ece52e15bfc0eb3a81b699f8d8043e420f050b1fdba8b1bae3b4df9ecc5c3181dcad4319b4e81bcca542ee3c87dce92ea8189a6e3eb960
|
data/CHANGELOG.md
CHANGED
@@ -15,9 +15,13 @@ module HTML
|
|
15
15
|
# https://github.com/rgrove/sanitize/#readme
|
16
16
|
#
|
17
17
|
# Context options:
|
18
|
-
# :whitelist
|
19
|
-
#
|
20
|
-
#
|
18
|
+
# :whitelist - The sanitizer whitelist configuration to use. This
|
19
|
+
# can be one of the options constants defined in this
|
20
|
+
# class or a custom sanitize options hash.
|
21
|
+
# :anchor_schemes - The URL schemes to allow in <a href> attributes. The
|
22
|
+
# default set is provided in the ANCHOR_SCHEMES
|
23
|
+
# constant in this class. If passed, this overrides any
|
24
|
+
# schemes specified in the whitelist configuration.
|
21
25
|
#
|
22
26
|
# This filter does not write additional information to the context.
|
23
27
|
class SanitizationFilter < Filter
|
@@ -32,6 +36,9 @@ module HTML
|
|
32
36
|
TABLE = 'table'.freeze
|
33
37
|
TABLE_SECTIONS = Set.new(%w(thead tbody tfoot).freeze)
|
34
38
|
|
39
|
+
# These schemes are the only ones allowed in <a href> attributes by default.
|
40
|
+
ANCHOR_SCHEMES = ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac'].freeze
|
41
|
+
|
35
42
|
# The main sanitization whitelist. Only these elements and attributes are
|
36
43
|
# allowed through by default.
|
37
44
|
WHITELIST = {
|
@@ -64,7 +71,7 @@ module HTML
|
|
64
71
|
'vspace', 'width', 'itemprop']
|
65
72
|
},
|
66
73
|
:protocols => {
|
67
|
-
'a' => {'href' =>
|
74
|
+
'a' => {'href' => ANCHOR_SCHEMES},
|
68
75
|
'img' => {'src' => ['http', 'https', :relative]}
|
69
76
|
},
|
70
77
|
:transformers => [
|
@@ -104,7 +111,13 @@ module HTML
|
|
104
111
|
# The whitelist to use when sanitizing. This can be passed in the context
|
105
112
|
# hash to the filter but defaults to WHITELIST constant value above.
|
106
113
|
def whitelist
|
107
|
-
context[:whitelist] || WHITELIST
|
114
|
+
whitelist = context[:whitelist] || WHITELIST
|
115
|
+
anchor_schemes = context[:anchor_schemes]
|
116
|
+
return whitelist unless anchor_schemes
|
117
|
+
whitelist = whitelist.dup
|
118
|
+
whitelist[:protocols] = (whitelist[:protocols] || {}).dup
|
119
|
+
whitelist[:protocols]['a'] = (whitelist[:protocols]['a'] || {}).merge('href' => anchor_schemes)
|
120
|
+
whitelist
|
108
121
|
end
|
109
122
|
end
|
110
123
|
end
|
@@ -45,6 +45,65 @@ class HTML::Pipeline::SanitizationFilterTest < Test::Unit::TestCase
|
|
45
45
|
assert_equal stuff, SanitizationFilter.call(stuff).to_s
|
46
46
|
end
|
47
47
|
|
48
|
+
def test_unknown_schemes_are_removed
|
49
|
+
stuff = '<a href="something-weird://heyyy">Wat</a> is this'
|
50
|
+
html = SanitizationFilter.call(stuff).to_s
|
51
|
+
assert_equal '<a>Wat</a> is this', html
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_standard_schemes_are_removed_if_not_specified_in_anchor_schemes
|
55
|
+
stuff = '<a href="http://www.example.com/">No href for you</a>'
|
56
|
+
filter = SanitizationFilter.new(stuff, {:anchor_schemes => []})
|
57
|
+
html = filter.call.to_s
|
58
|
+
assert_equal '<a>No href for you</a>', html
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_custom_anchor_schemes_are_not_removed
|
62
|
+
stuff = '<a href="something-weird://heyyy">Wat</a> is this'
|
63
|
+
filter = SanitizationFilter.new(stuff, {:anchor_schemes => ['something-weird']})
|
64
|
+
html = filter.call.to_s
|
65
|
+
assert_equal stuff, html
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_anchor_schemes_are_merged_with_other_anchor_restrictions
|
69
|
+
stuff = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this'
|
70
|
+
whitelist = {
|
71
|
+
:elements => ['a'],
|
72
|
+
:attributes => {'a' => ['href', 'ping']},
|
73
|
+
:protocols => {'a' => {'ping' => ['http']}}
|
74
|
+
}
|
75
|
+
filter = SanitizationFilter.new(stuff, {:whitelist => whitelist, :anchor_schemes => ['something-weird']})
|
76
|
+
html = filter.call.to_s
|
77
|
+
assert_equal '<a href="something-weird://heyyy">Wat</a> is this', html
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_uses_anchor_schemes_from_whitelist_when_not_separately_specified
|
81
|
+
stuff = '<a href="something-weird://heyyy">Wat</a> is this'
|
82
|
+
whitelist = {
|
83
|
+
:elements => ['a'],
|
84
|
+
:attributes => {'a' => ['href']},
|
85
|
+
:protocols => {'a' => {'href' => ['something-weird']}}
|
86
|
+
}
|
87
|
+
filter = SanitizationFilter.new(stuff, {:whitelist => whitelist})
|
88
|
+
html = filter.call.to_s
|
89
|
+
assert_equal stuff, html
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_whitelist_contains_default_anchor_schemes
|
93
|
+
assert_equal SanitizationFilter::WHITELIST[:protocols]['a']['href'], ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_whitelist_from_full_constant
|
97
|
+
stuff = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this'
|
98
|
+
filter = SanitizationFilter.new(stuff, :whitelist => SanitizationFilter::FULL)
|
99
|
+
html = filter.call.to_s
|
100
|
+
assert_equal 'Wat is this', html
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_exports_default_anchor_schemes
|
104
|
+
assert_equal SanitizationFilter::ANCHOR_SCHEMES, ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
|
105
|
+
end
|
106
|
+
|
48
107
|
def test_script_contents_are_removed
|
49
108
|
orig = '<script>JavaScript!</script>'
|
50
109
|
assert_equal "", SanitizationFilter.call(orig).to_s
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-pipeline
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Tomayko
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-01-
|
12
|
+
date: 2014-01-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|