html-pipeline 1.4.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 18ebd2d23ad0e2b826d0f799544324f75efad1c4
4
- data.tar.gz: c052603aa7e7dd3af1b9b75f55a52ad4be278fbf
3
+ metadata.gz: 283d2eec685e3f57e423f5ef348389a7f5d29f90
4
+ data.tar.gz: a007485225e182efd0a158bf59c9e4df08da49f6
5
5
  SHA512:
6
- metadata.gz: f5177792a0de9b78ccdfeb86c2b335ed25dcc83a2e10db7c8fa34236ec61e6746fb358c593fc156739c338144332e7e6f193300a6399485e1ea5210af3424ff2
7
- data.tar.gz: 905067a669f37813c52858536ebda8af639ba6b3b1929b361acd4f9460d06881f922db57f40033f2eb15b7eb061a31b365d497e0b19b6863ebbf42401bc7dc35
6
+ metadata.gz: 98e70609b064b690455547da4f61f9746f36fd3fc2b06ea8e4e5a1cd15fb75b3294d466c2336f36b3c650c1cca298cabd430fb39a65cbbf92040e1adb8ef0815
7
+ data.tar.gz: 24ccfcf8af8ce40388ece52e15bfc0eb3a81b699f8d8043e420f050b1fdba8b1bae3b4df9ecc5c3181dcad4319b4e81bcca542ee3c87dce92ea8189a6e3eb960
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## 1.5.0
4
+
5
+ * More flexible whitelist configuration for SanitizationFilter #98 aroben
6
+
3
7
  ## 1.4.0
4
8
 
5
9
  * Fix CamoFilter double entity encoding. #101 josh
@@ -15,9 +15,13 @@ module HTML
15
15
  # https://github.com/rgrove/sanitize/#readme
16
16
  #
17
17
  # Context options:
18
- # :whitelist - The sanitizer whitelist configuration to use. This can be one
19
- # of the options constants defined in this class or a custom
20
- # sanitize options hash.
18
+ # :whitelist - The sanitizer whitelist configuration to use. This
19
+ # can be one of the options constants defined in this
20
+ # class or a custom sanitize options hash.
21
+ # :anchor_schemes - The URL schemes to allow in <a href> attributes. The
22
+ # default set is provided in the ANCHOR_SCHEMES
23
+ # constant in this class. If passed, this overrides any
24
+ # schemes specified in the whitelist configuration.
21
25
  #
22
26
  # This filter does not write additional information to the context.
23
27
  class SanitizationFilter < Filter
@@ -32,6 +36,9 @@ module HTML
32
36
  TABLE = 'table'.freeze
33
37
  TABLE_SECTIONS = Set.new(%w(thead tbody tfoot).freeze)
34
38
 
39
+ # These schemes are the only ones allowed in <a href> attributes by default.
40
+ ANCHOR_SCHEMES = ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac'].freeze
41
+
35
42
  # The main sanitization whitelist. Only these elements and attributes are
36
43
  # allowed through by default.
37
44
  WHITELIST = {
@@ -64,7 +71,7 @@ module HTML
64
71
  'vspace', 'width', 'itemprop']
65
72
  },
66
73
  :protocols => {
67
- 'a' => {'href' => ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']},
74
+ 'a' => {'href' => ANCHOR_SCHEMES},
68
75
  'img' => {'src' => ['http', 'https', :relative]}
69
76
  },
70
77
  :transformers => [
@@ -104,7 +111,13 @@ module HTML
104
111
  # The whitelist to use when sanitizing. This can be passed in the context
105
112
  # hash to the filter but defaults to WHITELIST constant value above.
106
113
  def whitelist
107
- context[:whitelist] || WHITELIST
114
+ whitelist = context[:whitelist] || WHITELIST
115
+ anchor_schemes = context[:anchor_schemes]
116
+ return whitelist unless anchor_schemes
117
+ whitelist = whitelist.dup
118
+ whitelist[:protocols] = (whitelist[:protocols] || {}).dup
119
+ whitelist[:protocols]['a'] = (whitelist[:protocols]['a'] || {}).merge('href' => anchor_schemes)
120
+ whitelist
108
121
  end
109
122
  end
110
123
  end
@@ -1,5 +1,5 @@
1
1
  module HTML
2
2
  class Pipeline
3
- VERSION = "1.4.0"
3
+ VERSION = "1.5.0"
4
4
  end
5
5
  end
@@ -45,6 +45,65 @@ class HTML::Pipeline::SanitizationFilterTest < Test::Unit::TestCase
45
45
  assert_equal stuff, SanitizationFilter.call(stuff).to_s
46
46
  end
47
47
 
48
+ def test_unknown_schemes_are_removed
49
+ stuff = '<a href="something-weird://heyyy">Wat</a> is this'
50
+ html = SanitizationFilter.call(stuff).to_s
51
+ assert_equal '<a>Wat</a> is this', html
52
+ end
53
+
54
+ def test_standard_schemes_are_removed_if_not_specified_in_anchor_schemes
55
+ stuff = '<a href="http://www.example.com/">No href for you</a>'
56
+ filter = SanitizationFilter.new(stuff, {:anchor_schemes => []})
57
+ html = filter.call.to_s
58
+ assert_equal '<a>No href for you</a>', html
59
+ end
60
+
61
+ def test_custom_anchor_schemes_are_not_removed
62
+ stuff = '<a href="something-weird://heyyy">Wat</a> is this'
63
+ filter = SanitizationFilter.new(stuff, {:anchor_schemes => ['something-weird']})
64
+ html = filter.call.to_s
65
+ assert_equal stuff, html
66
+ end
67
+
68
+ def test_anchor_schemes_are_merged_with_other_anchor_restrictions
69
+ stuff = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this'
70
+ whitelist = {
71
+ :elements => ['a'],
72
+ :attributes => {'a' => ['href', 'ping']},
73
+ :protocols => {'a' => {'ping' => ['http']}}
74
+ }
75
+ filter = SanitizationFilter.new(stuff, {:whitelist => whitelist, :anchor_schemes => ['something-weird']})
76
+ html = filter.call.to_s
77
+ assert_equal '<a href="something-weird://heyyy">Wat</a> is this', html
78
+ end
79
+
80
+ def test_uses_anchor_schemes_from_whitelist_when_not_separately_specified
81
+ stuff = '<a href="something-weird://heyyy">Wat</a> is this'
82
+ whitelist = {
83
+ :elements => ['a'],
84
+ :attributes => {'a' => ['href']},
85
+ :protocols => {'a' => {'href' => ['something-weird']}}
86
+ }
87
+ filter = SanitizationFilter.new(stuff, {:whitelist => whitelist})
88
+ html = filter.call.to_s
89
+ assert_equal stuff, html
90
+ end
91
+
92
+ def test_whitelist_contains_default_anchor_schemes
93
+ assert_equal SanitizationFilter::WHITELIST[:protocols]['a']['href'], ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
94
+ end
95
+
96
+ def test_whitelist_from_full_constant
97
+ stuff = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this'
98
+ filter = SanitizationFilter.new(stuff, :whitelist => SanitizationFilter::FULL)
99
+ html = filter.call.to_s
100
+ assert_equal 'Wat is this', html
101
+ end
102
+
103
+ def test_exports_default_anchor_schemes
104
+ assert_equal SanitizationFilter::ANCHOR_SCHEMES, ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
105
+ end
106
+
48
107
  def test_script_contents_are_removed
49
108
  orig = '<script>JavaScript!</script>'
50
109
  assert_equal "", SanitizationFilter.call(orig).to_s
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-pipeline
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Tomayko
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-01-21 00:00:00.000000000 Z
12
+ date: 2014-01-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri