html-pipeline 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 18ebd2d23ad0e2b826d0f799544324f75efad1c4
4
- data.tar.gz: c052603aa7e7dd3af1b9b75f55a52ad4be278fbf
3
+ metadata.gz: 283d2eec685e3f57e423f5ef348389a7f5d29f90
4
+ data.tar.gz: a007485225e182efd0a158bf59c9e4df08da49f6
5
5
  SHA512:
6
- metadata.gz: f5177792a0de9b78ccdfeb86c2b335ed25dcc83a2e10db7c8fa34236ec61e6746fb358c593fc156739c338144332e7e6f193300a6399485e1ea5210af3424ff2
7
- data.tar.gz: 905067a669f37813c52858536ebda8af639ba6b3b1929b361acd4f9460d06881f922db57f40033f2eb15b7eb061a31b365d497e0b19b6863ebbf42401bc7dc35
6
+ metadata.gz: 98e70609b064b690455547da4f61f9746f36fd3fc2b06ea8e4e5a1cd15fb75b3294d466c2336f36b3c650c1cca298cabd430fb39a65cbbf92040e1adb8ef0815
7
+ data.tar.gz: 24ccfcf8af8ce40388ece52e15bfc0eb3a81b699f8d8043e420f050b1fdba8b1bae3b4df9ecc5c3181dcad4319b4e81bcca542ee3c87dce92ea8189a6e3eb960
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## 1.5.0
4
+
5
+ * More flexible whitelist configuration for SanitizationFilter #98 aroben
6
+
3
7
  ## 1.4.0
4
8
 
5
9
  * Fix CamoFilter double entity encoding. #101 josh
@@ -15,9 +15,13 @@ module HTML
15
15
  # https://github.com/rgrove/sanitize/#readme
16
16
  #
17
17
  # Context options:
18
- # :whitelist - The sanitizer whitelist configuration to use. This can be one
19
- # of the options constants defined in this class or a custom
20
- # sanitize options hash.
18
+ # :whitelist - The sanitizer whitelist configuration to use. This
19
+ # can be one of the options constants defined in this
20
+ # class or a custom sanitize options hash.
21
+ # :anchor_schemes - The URL schemes to allow in <a href> attributes. The
22
+ # default set is provided in the ANCHOR_SCHEMES
23
+ # constant in this class. If passed, this overrides any
24
+ # schemes specified in the whitelist configuration.
21
25
  #
22
26
  # This filter does not write additional information to the context.
23
27
  class SanitizationFilter < Filter
@@ -32,6 +36,9 @@ module HTML
32
36
  TABLE = 'table'.freeze
33
37
  TABLE_SECTIONS = Set.new(%w(thead tbody tfoot).freeze)
34
38
 
39
+ # These schemes are the only ones allowed in <a href> attributes by default.
40
+ ANCHOR_SCHEMES = ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac'].freeze
41
+
35
42
  # The main sanitization whitelist. Only these elements and attributes are
36
43
  # allowed through by default.
37
44
  WHITELIST = {
@@ -64,7 +71,7 @@ module HTML
64
71
  'vspace', 'width', 'itemprop']
65
72
  },
66
73
  :protocols => {
67
- 'a' => {'href' => ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']},
74
+ 'a' => {'href' => ANCHOR_SCHEMES},
68
75
  'img' => {'src' => ['http', 'https', :relative]}
69
76
  },
70
77
  :transformers => [
@@ -104,7 +111,13 @@ module HTML
104
111
  # The whitelist to use when sanitizing. This can be passed in the context
105
112
  # hash to the filter but defaults to WHITELIST constant value above.
106
113
  def whitelist
107
- context[:whitelist] || WHITELIST
114
+ whitelist = context[:whitelist] || WHITELIST
115
+ anchor_schemes = context[:anchor_schemes]
116
+ return whitelist unless anchor_schemes
117
+ whitelist = whitelist.dup
118
+ whitelist[:protocols] = (whitelist[:protocols] || {}).dup
119
+ whitelist[:protocols]['a'] = (whitelist[:protocols]['a'] || {}).merge('href' => anchor_schemes)
120
+ whitelist
108
121
  end
109
122
  end
110
123
  end
@@ -1,5 +1,5 @@
1
1
  module HTML
2
2
  class Pipeline
3
- VERSION = "1.4.0"
3
+ VERSION = "1.5.0"
4
4
  end
5
5
  end
@@ -45,6 +45,65 @@ class HTML::Pipeline::SanitizationFilterTest < Test::Unit::TestCase
45
45
  assert_equal stuff, SanitizationFilter.call(stuff).to_s
46
46
  end
47
47
 
48
+ def test_unknown_schemes_are_removed
49
+ stuff = '<a href="something-weird://heyyy">Wat</a> is this'
50
+ html = SanitizationFilter.call(stuff).to_s
51
+ assert_equal '<a>Wat</a> is this', html
52
+ end
53
+
54
+ def test_standard_schemes_are_removed_if_not_specified_in_anchor_schemes
55
+ stuff = '<a href="http://www.example.com/">No href for you</a>'
56
+ filter = SanitizationFilter.new(stuff, {:anchor_schemes => []})
57
+ html = filter.call.to_s
58
+ assert_equal '<a>No href for you</a>', html
59
+ end
60
+
61
+ def test_custom_anchor_schemes_are_not_removed
62
+ stuff = '<a href="something-weird://heyyy">Wat</a> is this'
63
+ filter = SanitizationFilter.new(stuff, {:anchor_schemes => ['something-weird']})
64
+ html = filter.call.to_s
65
+ assert_equal stuff, html
66
+ end
67
+
68
+ def test_anchor_schemes_are_merged_with_other_anchor_restrictions
69
+ stuff = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this'
70
+ whitelist = {
71
+ :elements => ['a'],
72
+ :attributes => {'a' => ['href', 'ping']},
73
+ :protocols => {'a' => {'ping' => ['http']}}
74
+ }
75
+ filter = SanitizationFilter.new(stuff, {:whitelist => whitelist, :anchor_schemes => ['something-weird']})
76
+ html = filter.call.to_s
77
+ assert_equal '<a href="something-weird://heyyy">Wat</a> is this', html
78
+ end
79
+
80
+ def test_uses_anchor_schemes_from_whitelist_when_not_separately_specified
81
+ stuff = '<a href="something-weird://heyyy">Wat</a> is this'
82
+ whitelist = {
83
+ :elements => ['a'],
84
+ :attributes => {'a' => ['href']},
85
+ :protocols => {'a' => {'href' => ['something-weird']}}
86
+ }
87
+ filter = SanitizationFilter.new(stuff, {:whitelist => whitelist})
88
+ html = filter.call.to_s
89
+ assert_equal stuff, html
90
+ end
91
+
92
+ def test_whitelist_contains_default_anchor_schemes
93
+ assert_equal SanitizationFilter::WHITELIST[:protocols]['a']['href'], ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
94
+ end
95
+
96
+ def test_whitelist_from_full_constant
97
+ stuff = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this'
98
+ filter = SanitizationFilter.new(stuff, :whitelist => SanitizationFilter::FULL)
99
+ html = filter.call.to_s
100
+ assert_equal 'Wat is this', html
101
+ end
102
+
103
+ def test_exports_default_anchor_schemes
104
+ assert_equal SanitizationFilter::ANCHOR_SCHEMES, ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
105
+ end
106
+
48
107
  def test_script_contents_are_removed
49
108
  orig = '<script>JavaScript!</script>'
50
109
  assert_equal "", SanitizationFilter.call(orig).to_s
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-pipeline
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Tomayko
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-01-21 00:00:00.000000000 Z
12
+ date: 2014-01-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri