loofah 2.20.0 → 2.21.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +32 -0
- data/README.md +97 -106
- data/lib/loofah/concerns.rb +207 -0
- data/lib/loofah/elements.rb +78 -76
- data/lib/loofah/helpers.rb +21 -15
- data/lib/loofah/{html → html4}/document.rb +5 -7
- data/lib/loofah/html4/document_fragment.rb +15 -0
- data/lib/loofah/html5/document.rb +17 -0
- data/lib/loofah/html5/document_fragment.rb +15 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +7 -6
- data/lib/loofah/html5/safelist.rb +937 -936
- data/lib/loofah/html5/scrub.rb +31 -31
- data/lib/loofah/metahelpers.rb +10 -6
- data/lib/loofah/scrubber.rb +10 -8
- data/lib/loofah/scrubbers.rb +52 -43
- data/lib/loofah/version.rb +2 -1
- data/lib/loofah/xml/document.rb +1 -0
- data/lib/loofah/xml/document_fragment.rb +2 -6
- data/lib/loofah.rb +119 -43
- metadata +15 -103
- data/lib/loofah/html/document_fragment.rb +0 -42
- data/lib/loofah/instance_methods.rb +0 -133
data/lib/loofah/html5/scrub.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require "cgi"
|
3
4
|
require "crass"
|
4
5
|
|
@@ -6,9 +7,9 @@ module Loofah
|
|
6
7
|
module HTML5 # :nodoc:
|
7
8
|
module Scrub
|
8
9
|
CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
|
9
|
-
CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
|
10
|
+
CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/ # rubocop:disable Layout/LineLength
|
10
11
|
CRASS_SEMICOLON = { node: :semicolon, raw: ";" }
|
11
|
-
CSS_IMPORTANT =
|
12
|
+
CSS_IMPORTANT = "!important"
|
12
13
|
CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES = /\A(["'])?[^"']+\1\z/
|
13
14
|
DATA_ATTRIBUTE_NAME = /\Adata-[\w-]+\z/
|
14
15
|
|
@@ -26,7 +27,7 @@ module Loofah
|
|
26
27
|
attr_node.node_name
|
27
28
|
end
|
28
29
|
|
29
|
-
if attr_name
|
30
|
+
if DATA_ATTRIBUTE_NAME.match?(attr_name)
|
30
31
|
next
|
31
32
|
end
|
32
33
|
|
@@ -43,10 +44,12 @@ module Loofah
|
|
43
44
|
scrub_attribute_that_allows_local_ref(attr_node)
|
44
45
|
end
|
45
46
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
next unless SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) &&
|
48
|
+
attr_name == "xlink:href" &&
|
49
|
+
attr_node.value =~ /^\s*[^#\s].*/m
|
50
|
+
|
51
|
+
attr_node.remove
|
52
|
+
next
|
50
53
|
end
|
51
54
|
|
52
55
|
scrub_css_attribute(node)
|
@@ -66,29 +69,28 @@ module Loofah
|
|
66
69
|
end
|
67
70
|
|
68
71
|
def scrub_css(style)
|
72
|
+
url_flags = [:url, :bad_url]
|
69
73
|
style_tree = Crass.parse_properties(style)
|
70
74
|
sanitized_tree = []
|
71
75
|
|
72
76
|
style_tree.each do |node|
|
73
77
|
next unless node[:node] == :property
|
74
78
|
next if node[:children].any? do |child|
|
75
|
-
|
79
|
+
url_flags.include?(child[:node])
|
76
80
|
end
|
77
81
|
|
78
82
|
name = node[:name].downcase
|
79
83
|
next unless SafeList::ALLOWED_CSS_PROPERTIES.include?(name) ||
|
80
|
-
|
81
|
-
|
84
|
+
SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
|
85
|
+
SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
|
82
86
|
|
83
87
|
value = node[:children].map do |child|
|
84
88
|
case child[:node]
|
85
89
|
when :whitespace
|
86
90
|
nil
|
87
91
|
when :string
|
88
|
-
if child[:raw]
|
92
|
+
if CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES.match?(child[:raw])
|
89
93
|
Crass::Parser.stringify(child)
|
90
|
-
else
|
91
|
-
nil
|
92
94
|
end
|
93
95
|
when :function
|
94
96
|
if SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase)
|
@@ -97,8 +99,8 @@ module Loofah
|
|
97
99
|
when :ident
|
98
100
|
keyword = child[:value]
|
99
101
|
if !SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) ||
|
100
|
-
|
101
|
-
|
102
|
+
SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
|
103
|
+
(keyword =~ CSS_KEYWORDISH)
|
102
104
|
keyword
|
103
105
|
end
|
104
106
|
else
|
@@ -107,6 +109,7 @@ module Loofah
|
|
107
109
|
end.compact
|
108
110
|
|
109
111
|
next if value.empty?
|
112
|
+
|
110
113
|
value << CSS_IMPORTANT if node[:important]
|
111
114
|
propstring = format("%s:%s", name, value.join(" "))
|
112
115
|
sanitized_node = Crass.parse_properties(propstring).first
|
@@ -126,13 +129,9 @@ module Loofah
|
|
126
129
|
when :url
|
127
130
|
if node[:value].start_with?("#")
|
128
131
|
node[:raw]
|
129
|
-
else
|
130
|
-
nil
|
131
132
|
end
|
132
133
|
when :hash, :ident, :string
|
133
134
|
node[:raw]
|
134
|
-
else
|
135
|
-
nil
|
136
135
|
end
|
137
136
|
end.compact
|
138
137
|
|
@@ -142,7 +141,8 @@ module Loofah
|
|
142
141
|
def scrub_uri_attribute(attr_node)
|
143
142
|
# this block lifted nearly verbatim from HTML5 sanitization
|
144
143
|
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
|
145
|
-
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ &&
|
144
|
+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ &&
|
145
|
+
!SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
|
146
146
|
attr_node.remove
|
147
147
|
return true
|
148
148
|
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
|
@@ -184,8 +184,8 @@ module Loofah
|
|
184
184
|
end
|
185
185
|
|
186
186
|
def cdata_needs_escaping?(node)
|
187
|
-
# Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style`
|
188
|
-
node.cdata? || (Nokogiri.jruby? && node.text? &&
|
187
|
+
# Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` tag as cdata, but it acts that way
|
188
|
+
node.cdata? || (Nokogiri.jruby? && node.text? && node.parent.name == "style")
|
189
189
|
end
|
190
190
|
|
191
191
|
def cdata_escape(node)
|
@@ -198,28 +198,28 @@ module Loofah
|
|
198
198
|
end
|
199
199
|
|
200
200
|
TABLE_FOR_ESCAPE_HTML__ = {
|
201
|
-
|
202
|
-
|
203
|
-
|
201
|
+
"<" => "<",
|
202
|
+
">" => ">",
|
203
|
+
"&" => "&",
|
204
204
|
}
|
205
205
|
|
206
206
|
def escape_tags(string)
|
207
207
|
# modified version of CGI.escapeHTML from ruby 3.1
|
208
208
|
enc = string.encoding
|
209
|
-
|
209
|
+
if enc.ascii_compatible?
|
210
|
+
string = string.b
|
211
|
+
string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
|
212
|
+
string.force_encoding(enc)
|
213
|
+
else
|
210
214
|
if enc.dummy?
|
211
215
|
origenc = enc
|
212
216
|
enc = Encoding::Converter.asciicompat_encoding(enc)
|
213
217
|
string = enc ? string.encode(enc) : string.b
|
214
218
|
end
|
215
|
-
table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
|
219
|
+
table = Hash[TABLE_FOR_ESCAPE_HTML__.map { |pair| pair.map { |s| s.encode(enc) } }]
|
216
220
|
string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
|
217
221
|
string.encode!(origenc) if origenc
|
218
222
|
string
|
219
|
-
else
|
220
|
-
string = string.b
|
221
|
-
string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
|
222
|
-
string.force_encoding(enc)
|
223
223
|
end
|
224
224
|
end
|
225
225
|
end
|
data/lib/loofah/metahelpers.rb
CHANGED
@@ -1,12 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Loofah
|
3
4
|
module MetaHelpers # :nodoc:
|
4
|
-
|
5
|
-
mojule
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
class << self
|
6
|
+
def add_downcased_set_members_to_all_set_constants(mojule)
|
7
|
+
mojule.constants.each do |constant_sym|
|
8
|
+
constant = mojule.const_get(constant_sym)
|
9
|
+
next unless Set === constant
|
10
|
+
|
11
|
+
constant.dup.each do |member|
|
12
|
+
constant.add(member.downcase)
|
13
|
+
end
|
10
14
|
end
|
11
15
|
end
|
12
16
|
end
|
data/lib/loofah/scrubber.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Loofah
|
3
4
|
#
|
4
5
|
# A RuntimeError raised when Loofah could not find an appropriate scrubber.
|
@@ -24,7 +25,7 @@ module Loofah
|
|
24
25
|
#
|
25
26
|
# This can then be run on a document:
|
26
27
|
#
|
27
|
-
# Loofah.
|
28
|
+
# Loofah.html5_fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
|
28
29
|
# # => "<div>foo</div><p>bar</p>"
|
29
30
|
#
|
30
31
|
# Scrubbers can be run on a document in either a top-down traversal (the
|
@@ -32,7 +33,6 @@ module Loofah
|
|
32
33
|
# Scrubber::STOP to terminate the traversal of a subtree.
|
33
34
|
#
|
34
35
|
class Scrubber
|
35
|
-
|
36
36
|
# Top-down Scrubbers may return CONTINUE to indicate that the subtree should be traversed.
|
37
37
|
CONTINUE = Object.new.freeze
|
38
38
|
|
@@ -67,7 +67,9 @@ module Loofah
|
|
67
67
|
unless [:top_down, :bottom_up].include?(direction)
|
68
68
|
raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
|
69
69
|
end
|
70
|
-
|
70
|
+
|
71
|
+
@direction = direction
|
72
|
+
@block = block
|
71
73
|
end
|
72
74
|
|
73
75
|
#
|
@@ -84,7 +86,7 @@ module Loofah
|
|
84
86
|
# +scrub+, which will be called for each document node.
|
85
87
|
#
|
86
88
|
def scrub(node)
|
87
|
-
raise ScrubberNotFound, "No scrub method has been defined on #{self.class
|
89
|
+
raise ScrubberNotFound, "No scrub method has been defined on #{self.class}"
|
88
90
|
end
|
89
91
|
|
90
92
|
#
|
@@ -103,8 +105,8 @@ module Loofah
|
|
103
105
|
def html5lib_sanitize(node)
|
104
106
|
case node.type
|
105
107
|
when Nokogiri::XML::Node::ELEMENT_NODE
|
106
|
-
if HTML5::Scrub.allowed_element?
|
107
|
-
HTML5::Scrub.scrub_attributes
|
108
|
+
if HTML5::Scrub.allowed_element?(node.name)
|
109
|
+
HTML5::Scrub.scrub_attributes(node)
|
108
110
|
return Scrubber::CONTINUE
|
109
111
|
end
|
110
112
|
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
|
@@ -120,8 +122,8 @@ module Loofah
|
|
120
122
|
def traverse_conditionally_top_down(node)
|
121
123
|
if block
|
122
124
|
return if block.call(node) == STOP
|
123
|
-
|
124
|
-
return
|
125
|
+
elsif scrub(node) == STOP
|
126
|
+
return
|
125
127
|
end
|
126
128
|
node.children.each { |j| traverse_conditionally_top_down(j) }
|
127
129
|
end
|
data/lib/loofah/scrubbers.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Loofah
|
3
4
|
#
|
4
5
|
# Loofah provides some built-in scrubbers for sanitizing with
|
@@ -11,7 +12,7 @@ module Loofah
|
|
11
12
|
# +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
|
12
13
|
#
|
13
14
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
14
|
-
# Loofah.
|
15
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:strip)
|
15
16
|
# => "ohai! <div>div is safe</div> but foo is <b>not</b>"
|
16
17
|
#
|
17
18
|
#
|
@@ -20,7 +21,7 @@ module Loofah
|
|
20
21
|
# +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
|
21
22
|
#
|
22
23
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
23
|
-
# Loofah.
|
24
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:prune)
|
24
25
|
# => "ohai! <div>div is safe</div> "
|
25
26
|
#
|
26
27
|
#
|
@@ -29,7 +30,7 @@ module Loofah
|
|
29
30
|
# +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
|
30
31
|
#
|
31
32
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
32
|
-
# Loofah.
|
33
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:escape)
|
33
34
|
# => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
34
35
|
#
|
35
36
|
#
|
@@ -41,7 +42,7 @@ module Loofah
|
|
41
42
|
# layer of paint on top of the HTML input to make it look nice.
|
42
43
|
#
|
43
44
|
# messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
|
44
|
-
# Loofah.
|
45
|
+
# Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
|
45
46
|
# => "ohai! <div>div with attributes</div>"
|
46
47
|
#
|
47
48
|
# One use case for this scrubber is to clean up HTML that was
|
@@ -56,7 +57,7 @@ module Loofah
|
|
56
57
|
# +:nofollow+ adds a rel="nofollow" attribute to all links
|
57
58
|
#
|
58
59
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
59
|
-
# Loofah.
|
60
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
|
60
61
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
|
61
62
|
#
|
62
63
|
#
|
@@ -65,7 +66,7 @@ module Loofah
|
|
65
66
|
# +:noopener+ adds a rel="noopener" attribute to all links
|
66
67
|
#
|
67
68
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
68
|
-
# Loofah.
|
69
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
|
69
70
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
|
70
71
|
#
|
71
72
|
#
|
@@ -74,7 +75,7 @@ module Loofah
|
|
74
75
|
# +:unprintable+ removes unprintable Unicode characters.
|
75
76
|
#
|
76
77
|
# markup = "<p>Some text with an unprintable character at the end\u2028</p>"
|
77
|
-
# Loofah.
|
78
|
+
# Loofah.html5_fragment(markup).scrub!(:unprintable)
|
78
79
|
# => "<p>Some text with an unprintable character at the end</p>"
|
79
80
|
#
|
80
81
|
# You may not be able to see the unprintable character in the above example, but there is a
|
@@ -90,19 +91,20 @@ module Loofah
|
|
90
91
|
# +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
|
91
92
|
#
|
92
93
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
93
|
-
# Loofah.
|
94
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:strip)
|
94
95
|
# => "ohai! <div>div is safe</div> but foo is <b>not</b>"
|
95
96
|
#
|
96
97
|
class Strip < Scrubber
|
97
|
-
def initialize
|
98
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
98
99
|
@direction = :bottom_up
|
99
100
|
end
|
100
101
|
|
101
102
|
def scrub(node)
|
102
103
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
104
|
+
|
103
105
|
node.before(node.children)
|
104
106
|
node.remove
|
105
|
-
|
107
|
+
STOP
|
106
108
|
end
|
107
109
|
end
|
108
110
|
|
@@ -112,18 +114,19 @@ module Loofah
|
|
112
114
|
# +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
|
113
115
|
#
|
114
116
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
115
|
-
# Loofah.
|
117
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:prune)
|
116
118
|
# => "ohai! <div>div is safe</div> "
|
117
119
|
#
|
118
120
|
class Prune < Scrubber
|
119
|
-
def initialize
|
121
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
120
122
|
@direction = :top_down
|
121
123
|
end
|
122
124
|
|
123
125
|
def scrub(node)
|
124
126
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
127
|
+
|
125
128
|
node.remove
|
126
|
-
|
129
|
+
STOP
|
127
130
|
end
|
128
131
|
end
|
129
132
|
|
@@ -133,19 +136,20 @@ module Loofah
|
|
133
136
|
# +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
|
134
137
|
#
|
135
138
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
136
|
-
# Loofah.
|
139
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:escape)
|
137
140
|
# => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
138
141
|
#
|
139
142
|
class Escape < Scrubber
|
140
|
-
def initialize
|
143
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
141
144
|
@direction = :top_down
|
142
145
|
end
|
143
146
|
|
144
147
|
def scrub(node)
|
145
148
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
146
|
-
|
149
|
+
|
150
|
+
node.add_next_sibling(Nokogiri::XML::Text.new(node.to_s, node.document))
|
147
151
|
node.remove
|
148
|
-
|
152
|
+
STOP
|
149
153
|
end
|
150
154
|
end
|
151
155
|
|
@@ -158,7 +162,7 @@ module Loofah
|
|
158
162
|
# layer of paint on top of the HTML input to make it look nice.
|
159
163
|
#
|
160
164
|
# messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
|
161
|
-
# Loofah.
|
165
|
+
# Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
|
162
166
|
# => "ohai! <div>div with attributes</div>"
|
163
167
|
#
|
164
168
|
# One use case for this scrubber is to clean up HTML that was
|
@@ -168,14 +172,14 @@ module Loofah
|
|
168
172
|
# Certainly not me.
|
169
173
|
#
|
170
174
|
class Whitewash < Scrubber
|
171
|
-
def initialize
|
175
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
172
176
|
@direction = :top_down
|
173
177
|
end
|
174
178
|
|
175
179
|
def scrub(node)
|
176
180
|
case node.type
|
177
181
|
when Nokogiri::XML::Node::ELEMENT_NODE
|
178
|
-
if HTML5::Scrub.allowed_element?
|
182
|
+
if HTML5::Scrub.allowed_element?(node.name)
|
179
183
|
node.attributes.each { |attr| node.remove_attribute(attr.first) }
|
180
184
|
return CONTINUE if node.namespaces.empty?
|
181
185
|
end
|
@@ -193,18 +197,19 @@ module Loofah
|
|
193
197
|
# +:nofollow+ adds a rel="nofollow" attribute to all links
|
194
198
|
#
|
195
199
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
196
|
-
# Loofah.
|
200
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
|
197
201
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
|
198
202
|
#
|
199
203
|
class NoFollow < Scrubber
|
200
|
-
def initialize
|
204
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
201
205
|
@direction = :top_down
|
202
206
|
end
|
203
207
|
|
204
208
|
def scrub(node)
|
205
209
|
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
210
|
+
|
206
211
|
append_attribute(node, "rel", "nofollow")
|
207
|
-
|
212
|
+
STOP
|
208
213
|
end
|
209
214
|
end
|
210
215
|
|
@@ -214,35 +219,37 @@ module Loofah
|
|
214
219
|
# +:noopener+ adds a rel="noopener" attribute to all links
|
215
220
|
#
|
216
221
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
217
|
-
# Loofah.
|
222
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
|
218
223
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
|
219
224
|
#
|
220
225
|
class NoOpener < Scrubber
|
221
|
-
def initialize
|
226
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
222
227
|
@direction = :top_down
|
223
228
|
end
|
224
229
|
|
225
230
|
def scrub(node)
|
226
231
|
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
232
|
+
|
227
233
|
append_attribute(node, "rel", "noopener")
|
228
|
-
|
234
|
+
STOP
|
229
235
|
end
|
230
236
|
end
|
231
237
|
|
232
238
|
# This class probably isn't useful publicly, but is used for #to_text's current implemention
|
233
239
|
class NewlineBlockElements < Scrubber # :nodoc:
|
234
|
-
def initialize
|
240
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
235
241
|
@direction = :bottom_up
|
236
242
|
end
|
237
243
|
|
238
244
|
def scrub(node)
|
239
245
|
return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
|
246
|
+
|
240
247
|
replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
|
241
248
|
"\n"
|
242
249
|
else
|
243
250
|
"\n#{node.content}\n"
|
244
251
|
end
|
245
|
-
node.add_next_sibling
|
252
|
+
node.add_next_sibling(Nokogiri::XML::Text.new(replacement, node.document))
|
246
253
|
node.remove
|
247
254
|
end
|
248
255
|
end
|
@@ -253,7 +260,7 @@ module Loofah
|
|
253
260
|
# +:unprintable+ removes unprintable Unicode characters.
|
254
261
|
#
|
255
262
|
# markup = "<p>Some text with an unprintable character at the end\u2028</p>"
|
256
|
-
# Loofah.
|
263
|
+
# Loofah.html5_fragment(markup).scrub!(:unprintable)
|
257
264
|
# => "<p>Some text with an unprintable character at the end</p>"
|
258
265
|
#
|
259
266
|
# You may not be able to see the unprintable character in the above example, but there is a
|
@@ -263,7 +270,7 @@ module Loofah
|
|
263
270
|
# http://timelessrepo.com/json-isnt-a-javascript-subset
|
264
271
|
#
|
265
272
|
class Unprintable < Scrubber
|
266
|
-
def initialize
|
273
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
267
274
|
@direction = :top_down
|
268
275
|
end
|
269
276
|
|
@@ -279,21 +286,23 @@ module Loofah
|
|
279
286
|
# A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
|
280
287
|
#
|
281
288
|
MAP = {
|
282
|
-
:
|
283
|
-
:
|
284
|
-
:
|
285
|
-
:
|
286
|
-
:
|
287
|
-
:
|
288
|
-
:
|
289
|
-
:
|
289
|
+
escape: Escape,
|
290
|
+
prune: Prune,
|
291
|
+
whitewash: Whitewash,
|
292
|
+
strip: Strip,
|
293
|
+
nofollow: NoFollow,
|
294
|
+
noopener: NoOpener,
|
295
|
+
newline_block_elements: NewlineBlockElements,
|
296
|
+
unprintable: Unprintable,
|
290
297
|
}
|
291
298
|
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
299
|
+
class << self
|
300
|
+
#
|
301
|
+
# Returns an array of symbols representing the built-in scrubbers
|
302
|
+
#
|
303
|
+
def scrubber_symbols
|
304
|
+
MAP.keys
|
305
|
+
end
|
297
306
|
end
|
298
307
|
end
|
299
308
|
end
|
data/lib/loofah/version.rb
CHANGED
data/lib/loofah/xml/document.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Loofah
|
3
4
|
module XML # :nodoc:
|
4
5
|
#
|
@@ -8,15 +9,10 @@ module Loofah
|
|
8
9
|
#
|
9
10
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
10
11
|
class << self
|
11
|
-
#
|
12
|
-
# Overridden Nokogiri::XML::DocumentFragment
|
13
|
-
# constructor. Applications should use Loofah.fragment to
|
14
|
-
# parse a fragment.
|
15
|
-
#
|
16
12
|
def parse(tags)
|
17
13
|
doc = Loofah::XML::Document.new
|
18
14
|
doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
|
19
|
-
|
15
|
+
new(doc, tags)
|
20
16
|
end
|
21
17
|
end
|
22
18
|
end
|