jets-html-sanitizer 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +7 -0
- data/MIT-LICENSE +48 -0
- data/README.md +142 -0
- data/lib/jets-html-sanitizer.rb +73 -0
- data/lib/jets/html/sanitizer.rb +152 -0
- data/lib/jets/html/sanitizer/version.rb +7 -0
- data/lib/jets/html/scrubbers.rb +201 -0
- data/test/sanitizer_test.rb +564 -0
- data/test/scrubbers_test.rb +174 -0
- metadata +131 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e9ab1d5a11318b93b5438b666e96563d7bf6b631dbd5735950af5f601311f3b4
|
4
|
+
data.tar.gz: 766430d915e2406a730c721d82c92f7cc7982e99c2d05474f293c4f784ae20ee
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7b9a8fcc55ca5f820637f470285f856f4bb6a7ec21c93ed3a768ad53278a1916696ce925542482fe3d91fcf331ff45b02c7e3e51a09e8a9b5f8e29fdfcecdff3
|
7
|
+
data.tar.gz: 42b089a1e9c5a7afb518554ef94dff8153e57a1f38f9ea97a04c5d947a39081de418e0a38817eb57a6c7b66616207db5ce977e8ed30ba6b71d93d02fe5eace36
|
data/CHANGELOG.md
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
Copyright (c) 2019 Tung Nguyen
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
|
24
|
+
# Original Authors
|
25
|
+
|
26
|
+
Copyright (c) 2013-2015 Rafael Mendonça França, Kasper Timm Hansen
|
27
|
+
|
28
|
+
MIT License
|
29
|
+
|
30
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
31
|
+
a copy of this software and associated documentation files (the
|
32
|
+
"Software"), to deal in the Software without restriction, including
|
33
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
34
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
35
|
+
permit persons to whom the Software is furnished to do so, subject to
|
36
|
+
the following conditions:
|
37
|
+
|
38
|
+
The above copyright notice and this permission notice shall be
|
39
|
+
included in all copies or substantial portions of the Software.
|
40
|
+
|
41
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
42
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
43
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
44
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
45
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
46
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
47
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
48
|
+
|
data/README.md
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
# Jets Html Sanitizers
|
2
|
+
|
3
|
+
This is a fork of rails-html-sanitizer. This is done so we can keep the namespace under `Jets` to avoid naming collisions with `Jets`. Credit for original work goes to the [rails-html-sanitizer](https://github.com/rails/rails-html-sanitizer) authors.
|
4
|
+
|
5
|
+
In Jets this gem will be responsible for sanitizing HTML fragments in Jets
|
6
|
+
applications, i.e. in the `sanitize`, `sanitize_css`, `strip_tags` and `strip_links` methods.
|
7
|
+
|
8
|
+
Jets Html Sanitizer is only intended to be used with Jets applications. If you need similar functionality in non Jets apps consider using [Loofah](https://github.com/flavorjones/loofah) directly (that's what handles sanitization under the hood).
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Add this line to your application's Gemfile:
|
13
|
+
|
14
|
+
gem 'jets-html-sanitizer'
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
$ bundle
|
19
|
+
|
20
|
+
Or install it yourself as:
|
21
|
+
|
22
|
+
$ gem install jets-html-sanitizer
|
23
|
+
|
24
|
+
## Usage
|
25
|
+
|
26
|
+
### Sanitizers
|
27
|
+
|
28
|
+
All sanitizers respond to `sanitize`.
|
29
|
+
|
30
|
+
#### FullSanitizer
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
full_sanitizer = Jets::Html::FullSanitizer.new
|
34
|
+
full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
|
35
|
+
# => Bold no more! See more here...
|
36
|
+
```
|
37
|
+
|
38
|
+
#### LinkSanitizer
|
39
|
+
|
40
|
+
```ruby
|
41
|
+
link_sanitizer = Jets::Html::LinkSanitizer.new
|
42
|
+
link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
|
43
|
+
# => Only the link text will be kept.
|
44
|
+
```
|
45
|
+
|
46
|
+
#### WhiteListSanitizer
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
white_list_sanitizer = Jets::Html::WhiteListSanitizer.new
|
50
|
+
|
51
|
+
# sanitize via an extensive white list of allowed elements
|
52
|
+
white_list_sanitizer.sanitize(@article.body)
|
53
|
+
|
54
|
+
# white list only the supplied tags and attributes
|
55
|
+
white_list_sanitizer.sanitize(@article.body, tags: %w(table tr td), attributes: %w(id class style))
|
56
|
+
|
57
|
+
# white list via a custom scrubber
|
58
|
+
white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
|
59
|
+
|
60
|
+
# white list sanitizer can also sanitize css
|
61
|
+
white_list_sanitizer.sanitize_css('background-color: #000;')
|
62
|
+
```
|
63
|
+
|
64
|
+
### Scrubbers
|
65
|
+
|
66
|
+
Scrubbers are objects responsible for removing nodes or attributes you don't want in your HTML document.
|
67
|
+
|
68
|
+
This gem includes two scrubbers `Jets::Html::PermitScrubber` and `Jets::Html::TargetScrubber`.
|
69
|
+
|
70
|
+
#### `Jets::Html::PermitScrubber`
|
71
|
+
|
72
|
+
This scrubber allows you to permit only the tags and attributes you want.
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
scrubber = Jets::Html::PermitScrubber.new
|
76
|
+
scrubber.tags = ['a']
|
77
|
+
|
78
|
+
html_fragment = Loofah.fragment('<a><img/ ></a>')
|
79
|
+
html_fragment.scrub!(scrubber)
|
80
|
+
html_fragment.to_s # => "<a></a>"
|
81
|
+
```
|
82
|
+
|
83
|
+
#### `Jets::Html::TargetScrubber`
|
84
|
+
|
85
|
+
Where `PermitScrubber` picks out tags and attributes to permit in sanitization,
|
86
|
+
`Jets::Html::TargetScrubber` targets them for removal.
|
87
|
+
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
scrubber = Jets::Html::TargetScrubber.new
|
91
|
+
scrubber.tags = ['img']
|
92
|
+
|
93
|
+
html_fragment = Loofah.fragment('<a><img/ ></a>')
|
94
|
+
html_fragment.scrub!(scrubber)
|
95
|
+
html_fragment.to_s # => "<a></a>"
|
96
|
+
```
|
97
|
+
|
98
|
+
#### Custom Scrubbers
|
99
|
+
|
100
|
+
You can also create custom scrubbers in your application if you want to.
|
101
|
+
|
102
|
+
```ruby
|
103
|
+
class CommentScrubber < Jets::Html::PermitScrubber
|
104
|
+
def initialize
|
105
|
+
super
|
106
|
+
self.tags = %w( form script comment blockquote )
|
107
|
+
self.attributes = %w( style )
|
108
|
+
end
|
109
|
+
|
110
|
+
def skip_node?(node)
|
111
|
+
node.text?
|
112
|
+
end
|
113
|
+
end
|
114
|
+
```
|
115
|
+
|
116
|
+
See `Jets::Html::PermitScrubber` documentation to learn more about which methods can be overridden.
|
117
|
+
|
118
|
+
#### Custom Scrubber in a Jets app
|
119
|
+
|
120
|
+
Using the `CommentScrubber` from above, you can use this in a Jets view like so:
|
121
|
+
|
122
|
+
```ruby
|
123
|
+
<%= sanitize @comment, scrubber: CommentScrubber.new %>
|
124
|
+
```
|
125
|
+
|
126
|
+
## Read more
|
127
|
+
|
128
|
+
Loofah is what underlies the sanitizers and scrubbers of jets-html-sanitizer.
|
129
|
+
- [Loofah and Loofah Scrubbers](https://github.com/flavorjones/loofah)
|
130
|
+
|
131
|
+
The `node` argument passed to some methods in a custom scrubber is an instance of `Nokogiri::XML::Node`.
|
132
|
+
- [`Nokogiri::XML::Node`](http://nokogiri.org/Nokogiri/XML/Node.html)
|
133
|
+
- [Nokogiri](http://nokogiri.org)
|
134
|
+
|
135
|
+
## Contributing to Jets Html Sanitizers
|
136
|
+
|
137
|
+
Jets Html Sanitizers is work of many contributors. You're encouraged to submit pull requests, propose features and discuss issues.
|
138
|
+
|
139
|
+
See [CONTRIBUTING](CONTRIBUTING.md).
|
140
|
+
|
141
|
+
## License
|
142
|
+
Jets Html Sanitizers is released under the [MIT License](MIT-LICENSE).
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require "jets/html/sanitizer/version"
|
2
|
+
require "loofah"
|
3
|
+
require "jets/html/scrubbers"
|
4
|
+
require "jets/html/sanitizer"
|
5
|
+
|
6
|
+
module Jets
|
7
|
+
module Html
|
8
|
+
class Sanitizer
|
9
|
+
class << self
|
10
|
+
def full_sanitizer
|
11
|
+
Html::FullSanitizer
|
12
|
+
end
|
13
|
+
|
14
|
+
def link_sanitizer
|
15
|
+
Html::LinkSanitizer
|
16
|
+
end
|
17
|
+
|
18
|
+
def white_list_sanitizer
|
19
|
+
Html::WhiteListSanitizer
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
module ActionView
|
27
|
+
module Helpers
|
28
|
+
module SanitizeHelper
|
29
|
+
module ClassMethods
|
30
|
+
# Replaces the allowed tags for the +sanitize+ helper.
|
31
|
+
#
|
32
|
+
# class Application < Jets::Application
|
33
|
+
# config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td'
|
34
|
+
# end
|
35
|
+
#
|
36
|
+
def sanitized_allowed_tags=(tags)
|
37
|
+
sanitizer_vendor.white_list_sanitizer.allowed_tags = tags
|
38
|
+
end
|
39
|
+
|
40
|
+
# Replaces the allowed HTML attributes for the +sanitize+ helper.
|
41
|
+
#
|
42
|
+
# class Application < Jets::Application
|
43
|
+
# config.action_view.sanitized_allowed_attributes = ['onclick', 'longdesc']
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
def sanitized_allowed_attributes=(attributes)
|
47
|
+
sanitizer_vendor.white_list_sanitizer.allowed_attributes = attributes
|
48
|
+
end
|
49
|
+
|
50
|
+
[:protocol_separator,
|
51
|
+
:uri_attributes,
|
52
|
+
:bad_tags,
|
53
|
+
:allowed_css_properties,
|
54
|
+
:allowed_css_keywords,
|
55
|
+
:shorthand_css_properties,
|
56
|
+
:allowed_protocols].each do |meth|
|
57
|
+
meth_name = "sanitized_#{meth}"
|
58
|
+
|
59
|
+
define_method(meth_name) { deprecate_option(meth_name) }
|
60
|
+
define_method("#{meth_name}=") { |_| deprecate_option("#{meth_name}=") }
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
def deprecate_option(name)
|
65
|
+
ActiveSupport::Deprecation.warn "The #{name} option is deprecated " \
|
66
|
+
"and has no effect. Until Jets 5 the old behavior can still be " \
|
67
|
+
"installed. To do this add the `jets-deprecated-sanitizer` to " \
|
68
|
+
"your Gemfile. Consult the Jets 4.2 upgrade guide for more information."
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
module Jets
|
2
|
+
module Html
|
3
|
+
XPATHS_TO_REMOVE = %w{.//script .//form comment()}
|
4
|
+
|
5
|
+
class Sanitizer # :nodoc:
|
6
|
+
def sanitize(html, options = {})
|
7
|
+
raise NotImplementedError, "subclasses must implement sanitize method."
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def remove_xpaths(node, xpaths)
|
13
|
+
node.xpath(*xpaths).remove
|
14
|
+
node
|
15
|
+
end
|
16
|
+
|
17
|
+
def properly_encode(fragment, options)
|
18
|
+
fragment.xml? ? fragment.to_xml(options) : fragment.to_html(options)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# === Jets::Html::FullSanitizer
|
23
|
+
# Removes all tags but strips out scripts, forms and comments.
|
24
|
+
#
|
25
|
+
# full_sanitizer = Jets::Html::FullSanitizer.new
|
26
|
+
# full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
|
27
|
+
# # => Bold no more! See more here...
|
28
|
+
class FullSanitizer < Sanitizer
|
29
|
+
def sanitize(html, options = {})
|
30
|
+
return unless html
|
31
|
+
return html if html.empty?
|
32
|
+
|
33
|
+
loofah_fragment = Loofah.fragment(html)
|
34
|
+
|
35
|
+
remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE)
|
36
|
+
loofah_fragment.scrub!(TextOnlyScrubber.new)
|
37
|
+
|
38
|
+
properly_encode(loofah_fragment, encoding: 'UTF-8')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# === Jets::Html::LinkSanitizer
|
43
|
+
# Removes a tags and href attributes leaving only the link text
|
44
|
+
#
|
45
|
+
# link_sanitizer = Jets::Html::LinkSanitizer.new
|
46
|
+
# link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
|
47
|
+
# # => Only the link text will be kept.
|
48
|
+
class LinkSanitizer < Sanitizer
|
49
|
+
def initialize
|
50
|
+
@link_scrubber = TargetScrubber.new
|
51
|
+
@link_scrubber.tags = %w(a href)
|
52
|
+
@link_scrubber.attributes = %w(href)
|
53
|
+
end
|
54
|
+
|
55
|
+
def sanitize(html, options = {})
|
56
|
+
Loofah.scrub_fragment(html, @link_scrubber).to_s
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# === Jets::Html::WhiteListSanitizer
|
61
|
+
# Sanitizes html and css from an extensive white list (see link further down).
|
62
|
+
#
|
63
|
+
# === Whitespace
|
64
|
+
# We can't make any guarantees about whitespace being kept or stripped.
|
65
|
+
# Loofah uses Nokogiri, which wraps either a C or Java parser for the
|
66
|
+
# respective Ruby implementation.
|
67
|
+
# Those two parsers determine how whitespace is ultimately handled.
|
68
|
+
#
|
69
|
+
# When the stripped markup will be rendered the users browser won't take
|
70
|
+
# whitespace into account anyway. It might be better to suggest your users
|
71
|
+
# wrap their whitespace sensitive content in pre tags or that you do
|
72
|
+
# so automatically.
|
73
|
+
#
|
74
|
+
# === Options
|
75
|
+
# Sanitizes both html and css via the white lists found here:
|
76
|
+
# https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/whitelist.rb
|
77
|
+
#
|
78
|
+
# WhiteListSanitizer also accepts options to configure
|
79
|
+
# the white list used when sanitizing html.
|
80
|
+
# There's a class level option:
|
81
|
+
# Jets::Html::WhiteListSanitizer.allowed_tags = %w(table tr td)
|
82
|
+
# Jets::Html::WhiteListSanitizer.allowed_attributes = %w(id class style)
|
83
|
+
#
|
84
|
+
# Tags and attributes can also be passed to +sanitize+.
|
85
|
+
# Passed options take precedence over the class level options.
|
86
|
+
#
|
87
|
+
# === Examples
|
88
|
+
# white_list_sanitizer = Jets::Html::WhiteListSanitizer.new
|
89
|
+
#
|
90
|
+
# Sanitize css doesn't take options
|
91
|
+
# white_list_sanitizer.sanitize_css('background-color: #000;')
|
92
|
+
#
|
93
|
+
# Default: sanitize via a extensive white list of allowed elements
|
94
|
+
# white_list_sanitizer.sanitize(@article.body)
|
95
|
+
#
|
96
|
+
# White list via the supplied tags and attributes
|
97
|
+
# white_list_sanitizer.sanitize(@article.body, tags: %w(table tr td),
|
98
|
+
# attributes: %w(id class style))
|
99
|
+
#
|
100
|
+
# White list via a custom scrubber
|
101
|
+
# white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
|
102
|
+
class WhiteListSanitizer < Sanitizer
|
103
|
+
class << self
|
104
|
+
attr_accessor :allowed_tags
|
105
|
+
attr_accessor :allowed_attributes
|
106
|
+
end
|
107
|
+
self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub
|
108
|
+
sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
|
109
|
+
acronym a img blockquote del ins))
|
110
|
+
self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
|
111
|
+
|
112
|
+
def initialize
|
113
|
+
@permit_scrubber = PermitScrubber.new
|
114
|
+
end
|
115
|
+
|
116
|
+
def sanitize(html, options = {})
|
117
|
+
return unless html
|
118
|
+
return html if html.empty?
|
119
|
+
|
120
|
+
loofah_fragment = Loofah.fragment(html)
|
121
|
+
|
122
|
+
if scrubber = options[:scrubber]
|
123
|
+
# No duck typing, Loofah ensures subclass of Loofah::Scrubber
|
124
|
+
loofah_fragment.scrub!(scrubber)
|
125
|
+
elsif allowed_tags(options) || allowed_attributes(options)
|
126
|
+
@permit_scrubber.tags = allowed_tags(options)
|
127
|
+
@permit_scrubber.attributes = allowed_attributes(options)
|
128
|
+
loofah_fragment.scrub!(@permit_scrubber)
|
129
|
+
else
|
130
|
+
remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE)
|
131
|
+
loofah_fragment.scrub!(:strip)
|
132
|
+
end
|
133
|
+
|
134
|
+
properly_encode(loofah_fragment, encoding: 'UTF-8')
|
135
|
+
end
|
136
|
+
|
137
|
+
def sanitize_css(style_string)
|
138
|
+
Loofah::HTML5::Scrub.scrub_css(style_string)
|
139
|
+
end
|
140
|
+
|
141
|
+
private
|
142
|
+
|
143
|
+
def allowed_tags(options)
|
144
|
+
options[:tags] || self.class.allowed_tags
|
145
|
+
end
|
146
|
+
|
147
|
+
def allowed_attributes(options)
|
148
|
+
options[:attributes] || self.class.allowed_attributes
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,201 @@
|
|
1
|
+
module Jets
|
2
|
+
module Html
|
3
|
+
# === Jets::Html::PermitScrubber
|
4
|
+
#
|
5
|
+
# Jets::Html::PermitScrubber allows you to permit only your own tags and/or attributes.
|
6
|
+
#
|
7
|
+
# Jets::Html::PermitScrubber can be subclassed to determine:
|
8
|
+
# - When a node should be skipped via +skip_node?+.
|
9
|
+
# - When a node is allowed via +allowed_node?+.
|
10
|
+
# - When an attribute should be scrubbed via +scrub_attribute?+.
|
11
|
+
#
|
12
|
+
# Subclasses don't need to worry if tags or attributes are set or not.
|
13
|
+
# If tags or attributes are not set, Loofah's behavior will be used.
|
14
|
+
# If you override +allowed_node?+ and no tags are set, it will not be called.
|
15
|
+
# Instead Loofahs behavior will be used.
|
16
|
+
# Likewise for +scrub_attribute?+ and attributes respectively.
|
17
|
+
#
|
18
|
+
# Text and CDATA nodes are skipped by default.
|
19
|
+
# Unallowed elements will be stripped, i.e. element is removed but its subtree kept.
|
20
|
+
# Supplied tags and attributes should be Enumerables.
|
21
|
+
#
|
22
|
+
# +tags=+
|
23
|
+
# If set, elements excluded will be stripped.
|
24
|
+
# If not, elements are stripped based on Loofahs +HTML5::Scrub.allowed_element?+.
|
25
|
+
#
|
26
|
+
# +attributes=+
|
27
|
+
# If set, attributes excluded will be removed.
|
28
|
+
# If not, attributes are removed based on Loofahs +HTML5::Scrub.scrub_attributes+.
|
29
|
+
#
|
30
|
+
# class CommentScrubber < Html::PermitScrubber
|
31
|
+
# def initialize
|
32
|
+
# super
|
33
|
+
# self.tags = %w(form script comment blockquote)
|
34
|
+
# end
|
35
|
+
#
|
36
|
+
# def skip_node?(node)
|
37
|
+
# node.text?
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# def scrub_attribute?(name)
|
41
|
+
# name == "style"
|
42
|
+
# end
|
43
|
+
# end
|
44
|
+
#
|
45
|
+
# See the documentation for Nokogiri::XML::Node to understand what's possible
|
46
|
+
# with nodes: http://nokogiri.org/Nokogiri/XML/Node.html
|
47
|
+
class PermitScrubber < Loofah::Scrubber
|
48
|
+
attr_reader :tags, :attributes
|
49
|
+
|
50
|
+
def initialize
|
51
|
+
@direction = :bottom_up
|
52
|
+
@tags, @attributes = nil, nil
|
53
|
+
end
|
54
|
+
|
55
|
+
def tags=(tags)
|
56
|
+
@tags = validate!(tags, :tags)
|
57
|
+
end
|
58
|
+
|
59
|
+
def attributes=(attributes)
|
60
|
+
@attributes = validate!(attributes, :attributes)
|
61
|
+
end
|
62
|
+
|
63
|
+
def scrub(node)
|
64
|
+
if node.cdata?
|
65
|
+
text = node.document.create_text_node node.text
|
66
|
+
node.replace text
|
67
|
+
return CONTINUE
|
68
|
+
end
|
69
|
+
return CONTINUE if skip_node?(node)
|
70
|
+
|
71
|
+
unless keep_node?(node)
|
72
|
+
return STOP if scrub_node(node) == STOP
|
73
|
+
end
|
74
|
+
|
75
|
+
scrub_attributes(node)
|
76
|
+
end
|
77
|
+
|
78
|
+
protected
|
79
|
+
|
80
|
+
def allowed_node?(node)
|
81
|
+
@tags.include?(node.name)
|
82
|
+
end
|
83
|
+
|
84
|
+
def skip_node?(node)
|
85
|
+
node.text?
|
86
|
+
end
|
87
|
+
|
88
|
+
def scrub_attribute?(name)
|
89
|
+
!@attributes.include?(name)
|
90
|
+
end
|
91
|
+
|
92
|
+
def keep_node?(node)
|
93
|
+
if @tags
|
94
|
+
allowed_node?(node)
|
95
|
+
else
|
96
|
+
Loofah::HTML5::Scrub.allowed_element?(node.name)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def scrub_node(node)
|
101
|
+
node.before(node.children) # strip
|
102
|
+
node.remove
|
103
|
+
end
|
104
|
+
|
105
|
+
def scrub_attributes(node)
|
106
|
+
if @attributes
|
107
|
+
node.attribute_nodes.each do |attr|
|
108
|
+
attr.remove if scrub_attribute?(attr.name)
|
109
|
+
scrub_attribute(node, attr)
|
110
|
+
end
|
111
|
+
|
112
|
+
scrub_css_attribute(node)
|
113
|
+
else
|
114
|
+
Loofah::HTML5::Scrub.scrub_attributes(node)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def scrub_css_attribute(node)
|
119
|
+
if Loofah::HTML5::Scrub.respond_to?(:scrub_css_attribute)
|
120
|
+
Loofah::HTML5::Scrub.scrub_css_attribute(node)
|
121
|
+
else
|
122
|
+
style = node.attributes['style']
|
123
|
+
style.value = Loofah::HTML5::Scrub.scrub_css(style.value) if style
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def validate!(var, name)
|
128
|
+
if var && !var.is_a?(Enumerable)
|
129
|
+
raise ArgumentError, "You should pass :#{name} as an Enumerable"
|
130
|
+
end
|
131
|
+
var
|
132
|
+
end
|
133
|
+
|
134
|
+
def scrub_attribute(node, attr_node)
|
135
|
+
attr_name = if attr_node.namespace
|
136
|
+
"#{attr_node.namespace.prefix}:#{attr_node.node_name}"
|
137
|
+
else
|
138
|
+
attr_node.node_name
|
139
|
+
end
|
140
|
+
|
141
|
+
if Loofah::HTML5::WhiteList::ATTR_VAL_IS_URI.include?(attr_name)
|
142
|
+
# this block lifted nearly verbatim from HTML5 sanitization
|
143
|
+
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(Loofah::HTML5::Scrub::CONTROL_CHARACTERS,'').downcase
|
144
|
+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! Loofah::HTML5::WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(Loofah::HTML5::WhiteList::PROTOCOL_SEPARATOR)[0])
|
145
|
+
attr_node.remove
|
146
|
+
end
|
147
|
+
end
|
148
|
+
if Loofah::HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
|
149
|
+
attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
|
150
|
+
end
|
151
|
+
if Loofah::HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
|
152
|
+
attr_node.remove
|
153
|
+
end
|
154
|
+
|
155
|
+
node.remove_attribute(attr_node.name) if attr_name == 'src' && attr_node.value !~ /[^[:space:]]/
|
156
|
+
|
157
|
+
Loofah::HTML5::Scrub.force_correct_attribute_escaping! node
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# === Jets::Html::TargetScrubber
|
162
|
+
#
|
163
|
+
# Where Jets::Html::PermitScrubber picks out tags and attributes to permit in
|
164
|
+
# sanitization, Jets::Html::TargetScrubber targets them for removal.
|
165
|
+
#
|
166
|
+
# +tags=+
|
167
|
+
# If set, elements included will be stripped.
|
168
|
+
#
|
169
|
+
# +attributes=+
|
170
|
+
# If set, attributes included will be removed.
|
171
|
+
class TargetScrubber < PermitScrubber
|
172
|
+
def allowed_node?(node)
|
173
|
+
!super
|
174
|
+
end
|
175
|
+
|
176
|
+
def scrub_attribute?(name)
|
177
|
+
!super
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# === Jets::Html::TextOnlyScrubber
|
182
|
+
#
|
183
|
+
# Jets::Html::TextOnlyScrubber allows you to permit text nodes.
|
184
|
+
#
|
185
|
+
# Unallowed elements will be stripped, i.e. element is removed but its subtree kept.
|
186
|
+
class TextOnlyScrubber < Loofah::Scrubber
|
187
|
+
def initialize
|
188
|
+
@direction = :bottom_up
|
189
|
+
end
|
190
|
+
|
191
|
+
def scrub(node)
|
192
|
+
if node.text?
|
193
|
+
CONTINUE
|
194
|
+
else
|
195
|
+
node.before node.children
|
196
|
+
node.remove
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|