jets-html-sanitizer 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +7 -0
- data/MIT-LICENSE +48 -0
- data/README.md +142 -0
- data/lib/jets-html-sanitizer.rb +73 -0
- data/lib/jets/html/sanitizer.rb +152 -0
- data/lib/jets/html/sanitizer/version.rb +7 -0
- data/lib/jets/html/scrubbers.rb +201 -0
- data/test/sanitizer_test.rb +564 -0
- data/test/scrubbers_test.rb +174 -0
- metadata +131 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e9ab1d5a11318b93b5438b666e96563d7bf6b631dbd5735950af5f601311f3b4
|
4
|
+
data.tar.gz: 766430d915e2406a730c721d82c92f7cc7982e99c2d05474f293c4f784ae20ee
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7b9a8fcc55ca5f820637f470285f856f4bb6a7ec21c93ed3a768ad53278a1916696ce925542482fe3d91fcf331ff45b02c7e3e51a09e8a9b5f8e29fdfcecdff3
|
7
|
+
data.tar.gz: 42b089a1e9c5a7afb518554ef94dff8153e57a1f38f9ea97a04c5d947a39081de418e0a38817eb57a6c7b66616207db5ce977e8ed30ba6b71d93d02fe5eace36
|
data/CHANGELOG.md
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
Copyright (c) 2019 Tung Nguyen
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
|
24
|
+
# Original Authors
|
25
|
+
|
26
|
+
Copyright (c) 2013-2015 Rafael Mendonça França, Kasper Timm Hansen
|
27
|
+
|
28
|
+
MIT License
|
29
|
+
|
30
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
31
|
+
a copy of this software and associated documentation files (the
|
32
|
+
"Software"), to deal in the Software without restriction, including
|
33
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
34
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
35
|
+
permit persons to whom the Software is furnished to do so, subject to
|
36
|
+
the following conditions:
|
37
|
+
|
38
|
+
The above copyright notice and this permission notice shall be
|
39
|
+
included in all copies or substantial portions of the Software.
|
40
|
+
|
41
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
42
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
43
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
44
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
45
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
46
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
47
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
48
|
+
|
data/README.md
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
# Jets Html Sanitizers
|
2
|
+
|
3
|
+
This is a fork of rails-html-sanitizer. This is done so we can keep the namespace under `Jets` to avoid naming collisions with `Jets`. Credit for original work goes to the [rails-html-sanitizer](https://github.com/rails/rails-html-sanitizer) authors.
|
4
|
+
|
5
|
+
In Jets this gem will be responsible for sanitizing HTML fragments in Jets
|
6
|
+
applications, i.e. in the `sanitize`, `sanitize_css`, `strip_tags` and `strip_links` methods.
|
7
|
+
|
8
|
+
Jets Html Sanitizer is only intended to be used with Jets applications. If you need similar functionality in non Jets apps consider using [Loofah](https://github.com/flavorjones/loofah) directly (that's what handles sanitization under the hood).
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Add this line to your application's Gemfile:
|
13
|
+
|
14
|
+
gem 'jets-html-sanitizer'
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
$ bundle
|
19
|
+
|
20
|
+
Or install it yourself as:
|
21
|
+
|
22
|
+
$ gem install jets-html-sanitizer
|
23
|
+
|
24
|
+
## Usage
|
25
|
+
|
26
|
+
### Sanitizers
|
27
|
+
|
28
|
+
All sanitizers respond to `sanitize`.
|
29
|
+
|
30
|
+
#### FullSanitizer
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
full_sanitizer = Jets::Html::FullSanitizer.new
|
34
|
+
full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
|
35
|
+
# => Bold no more! See more here...
|
36
|
+
```
|
37
|
+
|
38
|
+
#### LinkSanitizer
|
39
|
+
|
40
|
+
```ruby
|
41
|
+
link_sanitizer = Jets::Html::LinkSanitizer.new
|
42
|
+
link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
|
43
|
+
# => Only the link text will be kept.
|
44
|
+
```
|
45
|
+
|
46
|
+
#### WhiteListSanitizer
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
white_list_sanitizer = Jets::Html::WhiteListSanitizer.new
|
50
|
+
|
51
|
+
# sanitize via an extensive white list of allowed elements
|
52
|
+
white_list_sanitizer.sanitize(@article.body)
|
53
|
+
|
54
|
+
# white list only the supplied tags and attributes
|
55
|
+
white_list_sanitizer.sanitize(@article.body, tags: %w(table tr td), attributes: %w(id class style))
|
56
|
+
|
57
|
+
# white list via a custom scrubber
|
58
|
+
white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
|
59
|
+
|
60
|
+
# white list sanitizer can also sanitize css
|
61
|
+
white_list_sanitizer.sanitize_css('background-color: #000;')
|
62
|
+
```
|
63
|
+
|
64
|
+
### Scrubbers
|
65
|
+
|
66
|
+
Scrubbers are objects responsible for removing nodes or attributes you don't want in your HTML document.
|
67
|
+
|
68
|
+
This gem includes two scrubbers `Jets::Html::PermitScrubber` and `Jets::Html::TargetScrubber`.
|
69
|
+
|
70
|
+
#### `Jets::Html::PermitScrubber`
|
71
|
+
|
72
|
+
This scrubber allows you to permit only the tags and attributes you want.
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
scrubber = Jets::Html::PermitScrubber.new
|
76
|
+
scrubber.tags = ['a']
|
77
|
+
|
78
|
+
html_fragment = Loofah.fragment('<a><img/ ></a>')
|
79
|
+
html_fragment.scrub!(scrubber)
|
80
|
+
html_fragment.to_s # => "<a></a>"
|
81
|
+
```
|
82
|
+
|
83
|
+
#### `Jets::Html::TargetScrubber`
|
84
|
+
|
85
|
+
Where `PermitScrubber` picks out tags and attributes to permit in sanitization,
|
86
|
+
`Jets::Html::TargetScrubber` targets them for removal.
|
87
|
+
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
scrubber = Jets::Html::TargetScrubber.new
|
91
|
+
scrubber.tags = ['img']
|
92
|
+
|
93
|
+
html_fragment = Loofah.fragment('<a><img/ ></a>')
|
94
|
+
html_fragment.scrub!(scrubber)
|
95
|
+
html_fragment.to_s # => "<a></a>"
|
96
|
+
```
|
97
|
+
|
98
|
+
#### Custom Scrubbers
|
99
|
+
|
100
|
+
You can also create custom scrubbers in your application if you want to.
|
101
|
+
|
102
|
+
```ruby
|
103
|
+
class CommentScrubber < Jets::Html::PermitScrubber
|
104
|
+
def initialize
|
105
|
+
super
|
106
|
+
self.tags = %w( form script comment blockquote )
|
107
|
+
self.attributes = %w( style )
|
108
|
+
end
|
109
|
+
|
110
|
+
def skip_node?(node)
|
111
|
+
node.text?
|
112
|
+
end
|
113
|
+
end
|
114
|
+
```
|
115
|
+
|
116
|
+
See `Jets::Html::PermitScrubber` documentation to learn more about which methods can be overridden.
|
117
|
+
|
118
|
+
#### Custom Scrubber in a Jets app
|
119
|
+
|
120
|
+
Using the `CommentScrubber` from above, you can use this in a Jets view like so:
|
121
|
+
|
122
|
+
```ruby
|
123
|
+
<%= sanitize @comment, scrubber: CommentScrubber.new %>
|
124
|
+
```
|
125
|
+
|
126
|
+
## Read more
|
127
|
+
|
128
|
+
Loofah is what underlies the sanitizers and scrubbers of jets-html-sanitizer.
|
129
|
+
- [Loofah and Loofah Scrubbers](https://github.com/flavorjones/loofah)
|
130
|
+
|
131
|
+
The `node` argument passed to some methods in a custom scrubber is an instance of `Nokogiri::XML::Node`.
|
132
|
+
- [`Nokogiri::XML::Node`](http://nokogiri.org/Nokogiri/XML/Node.html)
|
133
|
+
- [Nokogiri](http://nokogiri.org)
|
134
|
+
|
135
|
+
## Contributing to Jets Html Sanitizers
|
136
|
+
|
137
|
+
Jets Html Sanitizers is work of many contributors. You're encouraged to submit pull requests, propose features and discuss issues.
|
138
|
+
|
139
|
+
See [CONTRIBUTING](CONTRIBUTING.md).
|
140
|
+
|
141
|
+
## License
|
142
|
+
Jets Html Sanitizers is released under the [MIT License](MIT-LICENSE).
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require "jets/html/sanitizer/version"
|
2
|
+
require "loofah"
|
3
|
+
require "jets/html/scrubbers"
|
4
|
+
require "jets/html/sanitizer"
|
5
|
+
|
6
|
+
module Jets
|
7
|
+
module Html
|
8
|
+
class Sanitizer
|
9
|
+
class << self
|
10
|
+
def full_sanitizer
|
11
|
+
Html::FullSanitizer
|
12
|
+
end
|
13
|
+
|
14
|
+
def link_sanitizer
|
15
|
+
Html::LinkSanitizer
|
16
|
+
end
|
17
|
+
|
18
|
+
def white_list_sanitizer
|
19
|
+
Html::WhiteListSanitizer
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
module ActionView
|
27
|
+
module Helpers
|
28
|
+
module SanitizeHelper
|
29
|
+
module ClassMethods
|
30
|
+
# Replaces the allowed tags for the +sanitize+ helper.
|
31
|
+
#
|
32
|
+
# class Application < Jets::Application
|
33
|
+
# config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td'
|
34
|
+
# end
|
35
|
+
#
|
36
|
+
def sanitized_allowed_tags=(tags)
|
37
|
+
sanitizer_vendor.white_list_sanitizer.allowed_tags = tags
|
38
|
+
end
|
39
|
+
|
40
|
+
# Replaces the allowed HTML attributes for the +sanitize+ helper.
|
41
|
+
#
|
42
|
+
# class Application < Jets::Application
|
43
|
+
# config.action_view.sanitized_allowed_attributes = ['onclick', 'longdesc']
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
def sanitized_allowed_attributes=(attributes)
|
47
|
+
sanitizer_vendor.white_list_sanitizer.allowed_attributes = attributes
|
48
|
+
end
|
49
|
+
|
50
|
+
[:protocol_separator,
|
51
|
+
:uri_attributes,
|
52
|
+
:bad_tags,
|
53
|
+
:allowed_css_properties,
|
54
|
+
:allowed_css_keywords,
|
55
|
+
:shorthand_css_properties,
|
56
|
+
:allowed_protocols].each do |meth|
|
57
|
+
meth_name = "sanitized_#{meth}"
|
58
|
+
|
59
|
+
define_method(meth_name) { deprecate_option(meth_name) }
|
60
|
+
define_method("#{meth_name}=") { |_| deprecate_option("#{meth_name}=") }
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
def deprecate_option(name)
|
65
|
+
ActiveSupport::Deprecation.warn "The #{name} option is deprecated " \
|
66
|
+
"and has no effect. Until Jets 5 the old behavior can still be " \
|
67
|
+
"installed. To do this add the `jets-deprecated-sanitizer` to " \
|
68
|
+
"your Gemfile. Consult the Jets 4.2 upgrade guide for more information."
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
module Jets
|
2
|
+
module Html
|
3
|
+
XPATHS_TO_REMOVE = %w{.//script .//form comment()}
|
4
|
+
|
5
|
+
class Sanitizer # :nodoc:
|
6
|
+
def sanitize(html, options = {})
|
7
|
+
raise NotImplementedError, "subclasses must implement sanitize method."
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def remove_xpaths(node, xpaths)
|
13
|
+
node.xpath(*xpaths).remove
|
14
|
+
node
|
15
|
+
end
|
16
|
+
|
17
|
+
def properly_encode(fragment, options)
|
18
|
+
fragment.xml? ? fragment.to_xml(options) : fragment.to_html(options)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# === Jets::Html::FullSanitizer
|
23
|
+
# Removes all tags but strips out scripts, forms and comments.
|
24
|
+
#
|
25
|
+
# full_sanitizer = Jets::Html::FullSanitizer.new
|
26
|
+
# full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
|
27
|
+
# # => Bold no more! See more here...
|
28
|
+
class FullSanitizer < Sanitizer
|
29
|
+
def sanitize(html, options = {})
|
30
|
+
return unless html
|
31
|
+
return html if html.empty?
|
32
|
+
|
33
|
+
loofah_fragment = Loofah.fragment(html)
|
34
|
+
|
35
|
+
remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE)
|
36
|
+
loofah_fragment.scrub!(TextOnlyScrubber.new)
|
37
|
+
|
38
|
+
properly_encode(loofah_fragment, encoding: 'UTF-8')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# === Jets::Html::LinkSanitizer
|
43
|
+
# Removes a tags and href attributes leaving only the link text
|
44
|
+
#
|
45
|
+
# link_sanitizer = Jets::Html::LinkSanitizer.new
|
46
|
+
# link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
|
47
|
+
# # => Only the link text will be kept.
|
48
|
+
class LinkSanitizer < Sanitizer
|
49
|
+
def initialize
|
50
|
+
@link_scrubber = TargetScrubber.new
|
51
|
+
@link_scrubber.tags = %w(a href)
|
52
|
+
@link_scrubber.attributes = %w(href)
|
53
|
+
end
|
54
|
+
|
55
|
+
def sanitize(html, options = {})
|
56
|
+
Loofah.scrub_fragment(html, @link_scrubber).to_s
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# === Jets::Html::WhiteListSanitizer
|
61
|
+
# Sanitizes html and css from an extensive white list (see link further down).
|
62
|
+
#
|
63
|
+
# === Whitespace
|
64
|
+
# We can't make any guarantees about whitespace being kept or stripped.
|
65
|
+
# Loofah uses Nokogiri, which wraps either a C or Java parser for the
|
66
|
+
# respective Ruby implementation.
|
67
|
+
# Those two parsers determine how whitespace is ultimately handled.
|
68
|
+
#
|
69
|
+
# When the stripped markup will be rendered the users browser won't take
|
70
|
+
# whitespace into account anyway. It might be better to suggest your users
|
71
|
+
# wrap their whitespace sensitive content in pre tags or that you do
|
72
|
+
# so automatically.
|
73
|
+
#
|
74
|
+
# === Options
|
75
|
+
# Sanitizes both html and css via the white lists found here:
|
76
|
+
# https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/whitelist.rb
|
77
|
+
#
|
78
|
+
# WhiteListSanitizer also accepts options to configure
|
79
|
+
# the white list used when sanitizing html.
|
80
|
+
# There's a class level option:
|
81
|
+
# Jets::Html::WhiteListSanitizer.allowed_tags = %w(table tr td)
|
82
|
+
# Jets::Html::WhiteListSanitizer.allowed_attributes = %w(id class style)
|
83
|
+
#
|
84
|
+
# Tags and attributes can also be passed to +sanitize+.
|
85
|
+
# Passed options take precedence over the class level options.
|
86
|
+
#
|
87
|
+
# === Examples
|
88
|
+
# white_list_sanitizer = Jets::Html::WhiteListSanitizer.new
|
89
|
+
#
|
90
|
+
# Sanitize css doesn't take options
|
91
|
+
# white_list_sanitizer.sanitize_css('background-color: #000;')
|
92
|
+
#
|
93
|
+
# Default: sanitize via a extensive white list of allowed elements
|
94
|
+
# white_list_sanitizer.sanitize(@article.body)
|
95
|
+
#
|
96
|
+
# White list via the supplied tags and attributes
|
97
|
+
# white_list_sanitizer.sanitize(@article.body, tags: %w(table tr td),
|
98
|
+
# attributes: %w(id class style))
|
99
|
+
#
|
100
|
+
# White list via a custom scrubber
|
101
|
+
# white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
|
102
|
+
class WhiteListSanitizer < Sanitizer
|
103
|
+
class << self
|
104
|
+
attr_accessor :allowed_tags
|
105
|
+
attr_accessor :allowed_attributes
|
106
|
+
end
|
107
|
+
self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub
|
108
|
+
sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
|
109
|
+
acronym a img blockquote del ins))
|
110
|
+
self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
|
111
|
+
|
112
|
+
def initialize
|
113
|
+
@permit_scrubber = PermitScrubber.new
|
114
|
+
end
|
115
|
+
|
116
|
+
def sanitize(html, options = {})
|
117
|
+
return unless html
|
118
|
+
return html if html.empty?
|
119
|
+
|
120
|
+
loofah_fragment = Loofah.fragment(html)
|
121
|
+
|
122
|
+
if scrubber = options[:scrubber]
|
123
|
+
# No duck typing, Loofah ensures subclass of Loofah::Scrubber
|
124
|
+
loofah_fragment.scrub!(scrubber)
|
125
|
+
elsif allowed_tags(options) || allowed_attributes(options)
|
126
|
+
@permit_scrubber.tags = allowed_tags(options)
|
127
|
+
@permit_scrubber.attributes = allowed_attributes(options)
|
128
|
+
loofah_fragment.scrub!(@permit_scrubber)
|
129
|
+
else
|
130
|
+
remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE)
|
131
|
+
loofah_fragment.scrub!(:strip)
|
132
|
+
end
|
133
|
+
|
134
|
+
properly_encode(loofah_fragment, encoding: 'UTF-8')
|
135
|
+
end
|
136
|
+
|
137
|
+
def sanitize_css(style_string)
|
138
|
+
Loofah::HTML5::Scrub.scrub_css(style_string)
|
139
|
+
end
|
140
|
+
|
141
|
+
private
|
142
|
+
|
143
|
+
def allowed_tags(options)
|
144
|
+
options[:tags] || self.class.allowed_tags
|
145
|
+
end
|
146
|
+
|
147
|
+
def allowed_attributes(options)
|
148
|
+
options[:attributes] || self.class.allowed_attributes
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,201 @@
|
|
1
|
+
module Jets
|
2
|
+
module Html
|
3
|
+
# === Jets::Html::PermitScrubber
|
4
|
+
#
|
5
|
+
# Jets::Html::PermitScrubber allows you to permit only your own tags and/or attributes.
|
6
|
+
#
|
7
|
+
# Jets::Html::PermitScrubber can be subclassed to determine:
|
8
|
+
# - When a node should be skipped via +skip_node?+.
|
9
|
+
# - When a node is allowed via +allowed_node?+.
|
10
|
+
# - When an attribute should be scrubbed via +scrub_attribute?+.
|
11
|
+
#
|
12
|
+
# Subclasses don't need to worry if tags or attributes are set or not.
|
13
|
+
# If tags or attributes are not set, Loofah's behavior will be used.
|
14
|
+
# If you override +allowed_node?+ and no tags are set, it will not be called.
|
15
|
+
# Instead Loofahs behavior will be used.
|
16
|
+
# Likewise for +scrub_attribute?+ and attributes respectively.
|
17
|
+
#
|
18
|
+
# Text and CDATA nodes are skipped by default.
|
19
|
+
# Unallowed elements will be stripped, i.e. element is removed but its subtree kept.
|
20
|
+
# Supplied tags and attributes should be Enumerables.
|
21
|
+
#
|
22
|
+
# +tags=+
|
23
|
+
# If set, elements excluded will be stripped.
|
24
|
+
# If not, elements are stripped based on Loofahs +HTML5::Scrub.allowed_element?+.
|
25
|
+
#
|
26
|
+
# +attributes=+
|
27
|
+
# If set, attributes excluded will be removed.
|
28
|
+
# If not, attributes are removed based on Loofahs +HTML5::Scrub.scrub_attributes+.
|
29
|
+
#
|
30
|
+
# class CommentScrubber < Html::PermitScrubber
|
31
|
+
# def initialize
|
32
|
+
# super
|
33
|
+
# self.tags = %w(form script comment blockquote)
|
34
|
+
# end
|
35
|
+
#
|
36
|
+
# def skip_node?(node)
|
37
|
+
# node.text?
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# def scrub_attribute?(name)
|
41
|
+
# name == "style"
|
42
|
+
# end
|
43
|
+
# end
|
44
|
+
#
|
45
|
+
# See the documentation for Nokogiri::XML::Node to understand what's possible
|
46
|
+
# with nodes: http://nokogiri.org/Nokogiri/XML/Node.html
|
47
|
+
class PermitScrubber < Loofah::Scrubber
|
48
|
+
attr_reader :tags, :attributes
|
49
|
+
|
50
|
+
def initialize
|
51
|
+
@direction = :bottom_up
|
52
|
+
@tags, @attributes = nil, nil
|
53
|
+
end
|
54
|
+
|
55
|
+
def tags=(tags)
|
56
|
+
@tags = validate!(tags, :tags)
|
57
|
+
end
|
58
|
+
|
59
|
+
def attributes=(attributes)
|
60
|
+
@attributes = validate!(attributes, :attributes)
|
61
|
+
end
|
62
|
+
|
63
|
+
def scrub(node)
|
64
|
+
if node.cdata?
|
65
|
+
text = node.document.create_text_node node.text
|
66
|
+
node.replace text
|
67
|
+
return CONTINUE
|
68
|
+
end
|
69
|
+
return CONTINUE if skip_node?(node)
|
70
|
+
|
71
|
+
unless keep_node?(node)
|
72
|
+
return STOP if scrub_node(node) == STOP
|
73
|
+
end
|
74
|
+
|
75
|
+
scrub_attributes(node)
|
76
|
+
end
|
77
|
+
|
78
|
+
protected
|
79
|
+
|
80
|
+
def allowed_node?(node)
|
81
|
+
@tags.include?(node.name)
|
82
|
+
end
|
83
|
+
|
84
|
+
def skip_node?(node)
|
85
|
+
node.text?
|
86
|
+
end
|
87
|
+
|
88
|
+
def scrub_attribute?(name)
|
89
|
+
!@attributes.include?(name)
|
90
|
+
end
|
91
|
+
|
92
|
+
def keep_node?(node)
|
93
|
+
if @tags
|
94
|
+
allowed_node?(node)
|
95
|
+
else
|
96
|
+
Loofah::HTML5::Scrub.allowed_element?(node.name)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def scrub_node(node)
|
101
|
+
node.before(node.children) # strip
|
102
|
+
node.remove
|
103
|
+
end
|
104
|
+
|
105
|
+
def scrub_attributes(node)
|
106
|
+
if @attributes
|
107
|
+
node.attribute_nodes.each do |attr|
|
108
|
+
attr.remove if scrub_attribute?(attr.name)
|
109
|
+
scrub_attribute(node, attr)
|
110
|
+
end
|
111
|
+
|
112
|
+
scrub_css_attribute(node)
|
113
|
+
else
|
114
|
+
Loofah::HTML5::Scrub.scrub_attributes(node)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def scrub_css_attribute(node)
|
119
|
+
if Loofah::HTML5::Scrub.respond_to?(:scrub_css_attribute)
|
120
|
+
Loofah::HTML5::Scrub.scrub_css_attribute(node)
|
121
|
+
else
|
122
|
+
style = node.attributes['style']
|
123
|
+
style.value = Loofah::HTML5::Scrub.scrub_css(style.value) if style
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def validate!(var, name)
|
128
|
+
if var && !var.is_a?(Enumerable)
|
129
|
+
raise ArgumentError, "You should pass :#{name} as an Enumerable"
|
130
|
+
end
|
131
|
+
var
|
132
|
+
end
|
133
|
+
|
134
|
+
def scrub_attribute(node, attr_node)
|
135
|
+
attr_name = if attr_node.namespace
|
136
|
+
"#{attr_node.namespace.prefix}:#{attr_node.node_name}"
|
137
|
+
else
|
138
|
+
attr_node.node_name
|
139
|
+
end
|
140
|
+
|
141
|
+
if Loofah::HTML5::WhiteList::ATTR_VAL_IS_URI.include?(attr_name)
|
142
|
+
# this block lifted nearly verbatim from HTML5 sanitization
|
143
|
+
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(Loofah::HTML5::Scrub::CONTROL_CHARACTERS,'').downcase
|
144
|
+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! Loofah::HTML5::WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(Loofah::HTML5::WhiteList::PROTOCOL_SEPARATOR)[0])
|
145
|
+
attr_node.remove
|
146
|
+
end
|
147
|
+
end
|
148
|
+
if Loofah::HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
|
149
|
+
attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
|
150
|
+
end
|
151
|
+
if Loofah::HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
|
152
|
+
attr_node.remove
|
153
|
+
end
|
154
|
+
|
155
|
+
node.remove_attribute(attr_node.name) if attr_name == 'src' && attr_node.value !~ /[^[:space:]]/
|
156
|
+
|
157
|
+
Loofah::HTML5::Scrub.force_correct_attribute_escaping! node
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# === Jets::Html::TargetScrubber
|
162
|
+
#
|
163
|
+
# Where Jets::Html::PermitScrubber picks out tags and attributes to permit in
|
164
|
+
# sanitization, Jets::Html::TargetScrubber targets them for removal.
|
165
|
+
#
|
166
|
+
# +tags=+
|
167
|
+
# If set, elements included will be stripped.
|
168
|
+
#
|
169
|
+
# +attributes=+
|
170
|
+
# If set, attributes included will be removed.
|
171
|
+
class TargetScrubber < PermitScrubber
|
172
|
+
def allowed_node?(node)
|
173
|
+
!super
|
174
|
+
end
|
175
|
+
|
176
|
+
def scrub_attribute?(name)
|
177
|
+
!super
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# === Jets::Html::TextOnlyScrubber
|
182
|
+
#
|
183
|
+
# Jets::Html::TextOnlyScrubber allows you to permit text nodes.
|
184
|
+
#
|
185
|
+
# Unallowed elements will be stripped, i.e. element is removed but its subtree kept.
|
186
|
+
class TextOnlyScrubber < Loofah::Scrubber
|
187
|
+
def initialize
|
188
|
+
@direction = :bottom_up
|
189
|
+
end
|
190
|
+
|
191
|
+
def scrub(node)
|
192
|
+
if node.text?
|
193
|
+
CONTINUE
|
194
|
+
else
|
195
|
+
node.before node.children
|
196
|
+
node.remove
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|