html-pipeline 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/html/pipeline/sanitization_filter.rb +18 -5
- data/lib/html/pipeline/version.rb +1 -1
- data/test/html/pipeline/sanitization_filter_test.rb +59 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 283d2eec685e3f57e423f5ef348389a7f5d29f90
|
4
|
+
data.tar.gz: a007485225e182efd0a158bf59c9e4df08da49f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 98e70609b064b690455547da4f61f9746f36fd3fc2b06ea8e4e5a1cd15fb75b3294d466c2336f36b3c650c1cca298cabd430fb39a65cbbf92040e1adb8ef0815
|
7
|
+
data.tar.gz: 24ccfcf8af8ce40388ece52e15bfc0eb3a81b699f8d8043e420f050b1fdba8b1bae3b4df9ecc5c3181dcad4319b4e81bcca542ee3c87dce92ea8189a6e3eb960
|
data/CHANGELOG.md
CHANGED
@@ -15,9 +15,13 @@ module HTML
|
|
15
15
|
# https://github.com/rgrove/sanitize/#readme
|
16
16
|
#
|
17
17
|
# Context options:
|
18
|
-
# :whitelist
|
19
|
-
#
|
20
|
-
#
|
18
|
+
# :whitelist - The sanitizer whitelist configuration to use. This
|
19
|
+
# can be one of the options constants defined in this
|
20
|
+
# class or a custom sanitize options hash.
|
21
|
+
# :anchor_schemes - The URL schemes to allow in <a href> attributes. The
|
22
|
+
# default set is provided in the ANCHOR_SCHEMES
|
23
|
+
# constant in this class. If passed, this overrides any
|
24
|
+
# schemes specified in the whitelist configuration.
|
21
25
|
#
|
22
26
|
# This filter does not write additional information to the context.
|
23
27
|
class SanitizationFilter < Filter
|
@@ -32,6 +36,9 @@ module HTML
|
|
32
36
|
TABLE = 'table'.freeze
|
33
37
|
TABLE_SECTIONS = Set.new(%w(thead tbody tfoot).freeze)
|
34
38
|
|
39
|
+
# These schemes are the only ones allowed in <a href> attributes by default.
|
40
|
+
ANCHOR_SCHEMES = ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac'].freeze
|
41
|
+
|
35
42
|
# The main sanitization whitelist. Only these elements and attributes are
|
36
43
|
# allowed through by default.
|
37
44
|
WHITELIST = {
|
@@ -64,7 +71,7 @@ module HTML
|
|
64
71
|
'vspace', 'width', 'itemprop']
|
65
72
|
},
|
66
73
|
:protocols => {
|
67
|
-
'a' => {'href' =>
|
74
|
+
'a' => {'href' => ANCHOR_SCHEMES},
|
68
75
|
'img' => {'src' => ['http', 'https', :relative]}
|
69
76
|
},
|
70
77
|
:transformers => [
|
@@ -104,7 +111,13 @@ module HTML
|
|
104
111
|
# The whitelist to use when sanitizing. This can be passed in the context
|
105
112
|
# hash to the filter but defaults to WHITELIST constant value above.
|
106
113
|
def whitelist
|
107
|
-
context[:whitelist] || WHITELIST
|
114
|
+
whitelist = context[:whitelist] || WHITELIST
|
115
|
+
anchor_schemes = context[:anchor_schemes]
|
116
|
+
return whitelist unless anchor_schemes
|
117
|
+
whitelist = whitelist.dup
|
118
|
+
whitelist[:protocols] = (whitelist[:protocols] || {}).dup
|
119
|
+
whitelist[:protocols]['a'] = (whitelist[:protocols]['a'] || {}).merge('href' => anchor_schemes)
|
120
|
+
whitelist
|
108
121
|
end
|
109
122
|
end
|
110
123
|
end
|
@@ -45,6 +45,65 @@ class HTML::Pipeline::SanitizationFilterTest < Test::Unit::TestCase
|
|
45
45
|
assert_equal stuff, SanitizationFilter.call(stuff).to_s
|
46
46
|
end
|
47
47
|
|
48
|
+
def test_unknown_schemes_are_removed
|
49
|
+
stuff = '<a href="something-weird://heyyy">Wat</a> is this'
|
50
|
+
html = SanitizationFilter.call(stuff).to_s
|
51
|
+
assert_equal '<a>Wat</a> is this', html
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_standard_schemes_are_removed_if_not_specified_in_anchor_schemes
|
55
|
+
stuff = '<a href="http://www.example.com/">No href for you</a>'
|
56
|
+
filter = SanitizationFilter.new(stuff, {:anchor_schemes => []})
|
57
|
+
html = filter.call.to_s
|
58
|
+
assert_equal '<a>No href for you</a>', html
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_custom_anchor_schemes_are_not_removed
|
62
|
+
stuff = '<a href="something-weird://heyyy">Wat</a> is this'
|
63
|
+
filter = SanitizationFilter.new(stuff, {:anchor_schemes => ['something-weird']})
|
64
|
+
html = filter.call.to_s
|
65
|
+
assert_equal stuff, html
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_anchor_schemes_are_merged_with_other_anchor_restrictions
|
69
|
+
stuff = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this'
|
70
|
+
whitelist = {
|
71
|
+
:elements => ['a'],
|
72
|
+
:attributes => {'a' => ['href', 'ping']},
|
73
|
+
:protocols => {'a' => {'ping' => ['http']}}
|
74
|
+
}
|
75
|
+
filter = SanitizationFilter.new(stuff, {:whitelist => whitelist, :anchor_schemes => ['something-weird']})
|
76
|
+
html = filter.call.to_s
|
77
|
+
assert_equal '<a href="something-weird://heyyy">Wat</a> is this', html
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_uses_anchor_schemes_from_whitelist_when_not_separately_specified
|
81
|
+
stuff = '<a href="something-weird://heyyy">Wat</a> is this'
|
82
|
+
whitelist = {
|
83
|
+
:elements => ['a'],
|
84
|
+
:attributes => {'a' => ['href']},
|
85
|
+
:protocols => {'a' => {'href' => ['something-weird']}}
|
86
|
+
}
|
87
|
+
filter = SanitizationFilter.new(stuff, {:whitelist => whitelist})
|
88
|
+
html = filter.call.to_s
|
89
|
+
assert_equal stuff, html
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_whitelist_contains_default_anchor_schemes
|
93
|
+
assert_equal SanitizationFilter::WHITELIST[:protocols]['a']['href'], ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_whitelist_from_full_constant
|
97
|
+
stuff = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this'
|
98
|
+
filter = SanitizationFilter.new(stuff, :whitelist => SanitizationFilter::FULL)
|
99
|
+
html = filter.call.to_s
|
100
|
+
assert_equal 'Wat is this', html
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_exports_default_anchor_schemes
|
104
|
+
assert_equal SanitizationFilter::ANCHOR_SCHEMES, ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
|
105
|
+
end
|
106
|
+
|
48
107
|
def test_script_contents_are_removed
|
49
108
|
orig = '<script>JavaScript!</script>'
|
50
109
|
assert_equal "", SanitizationFilter.call(orig).to_s
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-pipeline
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Tomayko
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-01-
|
12
|
+
date: 2014-01-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|