loofah 2.19.1 → 2.21.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/README.md +102 -107
- data/lib/loofah/concerns.rb +207 -0
- data/lib/loofah/elements.rb +78 -76
- data/lib/loofah/helpers.rb +21 -15
- data/lib/loofah/{html → html4}/document.rb +5 -7
- data/lib/loofah/html4/document_fragment.rb +15 -0
- data/lib/loofah/html5/document.rb +17 -0
- data/lib/loofah/html5/document_fragment.rb +15 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +7 -6
- data/lib/loofah/html5/safelist.rb +937 -924
- data/lib/loofah/html5/scrub.rb +31 -31
- data/lib/loofah/metahelpers.rb +10 -6
- data/lib/loofah/scrubber.rb +10 -8
- data/lib/loofah/scrubbers.rb +52 -43
- data/lib/loofah/version.rb +2 -1
- data/lib/loofah/xml/document.rb +1 -0
- data/lib/loofah/xml/document_fragment.rb +2 -6
- data/lib/loofah.rb +116 -43
- metadata +17 -119
- data/lib/loofah/html/document_fragment.rb +0 -42
- data/lib/loofah/instance_methods.rb +0 -133
data/lib/loofah/html5/scrub.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require "cgi"
|
3
4
|
require "crass"
|
4
5
|
|
@@ -6,9 +7,9 @@ module Loofah
|
|
6
7
|
module HTML5 # :nodoc:
|
7
8
|
module Scrub
|
8
9
|
CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
|
9
|
-
CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
|
10
|
+
CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/ # rubocop:disable Layout/LineLength
|
10
11
|
CRASS_SEMICOLON = { node: :semicolon, raw: ";" }
|
11
|
-
CSS_IMPORTANT =
|
12
|
+
CSS_IMPORTANT = "!important"
|
12
13
|
CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES = /\A(["'])?[^"']+\1\z/
|
13
14
|
DATA_ATTRIBUTE_NAME = /\Adata-[\w-]+\z/
|
14
15
|
|
@@ -26,7 +27,7 @@ module Loofah
|
|
26
27
|
attr_node.node_name
|
27
28
|
end
|
28
29
|
|
29
|
-
if attr_name
|
30
|
+
if DATA_ATTRIBUTE_NAME.match?(attr_name)
|
30
31
|
next
|
31
32
|
end
|
32
33
|
|
@@ -43,10 +44,12 @@ module Loofah
|
|
43
44
|
scrub_attribute_that_allows_local_ref(attr_node)
|
44
45
|
end
|
45
46
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
next unless SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) &&
|
48
|
+
attr_name == "xlink:href" &&
|
49
|
+
attr_node.value =~ /^\s*[^#\s].*/m
|
50
|
+
|
51
|
+
attr_node.remove
|
52
|
+
next
|
50
53
|
end
|
51
54
|
|
52
55
|
scrub_css_attribute(node)
|
@@ -66,29 +69,28 @@ module Loofah
|
|
66
69
|
end
|
67
70
|
|
68
71
|
def scrub_css(style)
|
72
|
+
url_flags = [:url, :bad_url]
|
69
73
|
style_tree = Crass.parse_properties(style)
|
70
74
|
sanitized_tree = []
|
71
75
|
|
72
76
|
style_tree.each do |node|
|
73
77
|
next unless node[:node] == :property
|
74
78
|
next if node[:children].any? do |child|
|
75
|
-
|
79
|
+
url_flags.include?(child[:node])
|
76
80
|
end
|
77
81
|
|
78
82
|
name = node[:name].downcase
|
79
83
|
next unless SafeList::ALLOWED_CSS_PROPERTIES.include?(name) ||
|
80
|
-
|
81
|
-
|
84
|
+
SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
|
85
|
+
SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
|
82
86
|
|
83
87
|
value = node[:children].map do |child|
|
84
88
|
case child[:node]
|
85
89
|
when :whitespace
|
86
90
|
nil
|
87
91
|
when :string
|
88
|
-
if child[:raw]
|
92
|
+
if CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES.match?(child[:raw])
|
89
93
|
Crass::Parser.stringify(child)
|
90
|
-
else
|
91
|
-
nil
|
92
94
|
end
|
93
95
|
when :function
|
94
96
|
if SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase)
|
@@ -97,8 +99,8 @@ module Loofah
|
|
97
99
|
when :ident
|
98
100
|
keyword = child[:value]
|
99
101
|
if !SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) ||
|
100
|
-
|
101
|
-
|
102
|
+
SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
|
103
|
+
(keyword =~ CSS_KEYWORDISH)
|
102
104
|
keyword
|
103
105
|
end
|
104
106
|
else
|
@@ -107,6 +109,7 @@ module Loofah
|
|
107
109
|
end.compact
|
108
110
|
|
109
111
|
next if value.empty?
|
112
|
+
|
110
113
|
value << CSS_IMPORTANT if node[:important]
|
111
114
|
propstring = format("%s:%s", name, value.join(" "))
|
112
115
|
sanitized_node = Crass.parse_properties(propstring).first
|
@@ -126,13 +129,9 @@ module Loofah
|
|
126
129
|
when :url
|
127
130
|
if node[:value].start_with?("#")
|
128
131
|
node[:raw]
|
129
|
-
else
|
130
|
-
nil
|
131
132
|
end
|
132
133
|
when :hash, :ident, :string
|
133
134
|
node[:raw]
|
134
|
-
else
|
135
|
-
nil
|
136
135
|
end
|
137
136
|
end.compact
|
138
137
|
|
@@ -142,7 +141,8 @@ module Loofah
|
|
142
141
|
def scrub_uri_attribute(attr_node)
|
143
142
|
# this block lifted nearly verbatim from HTML5 sanitization
|
144
143
|
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
|
145
|
-
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ &&
|
144
|
+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ &&
|
145
|
+
!SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
|
146
146
|
attr_node.remove
|
147
147
|
return true
|
148
148
|
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
|
@@ -184,8 +184,8 @@ module Loofah
|
|
184
184
|
end
|
185
185
|
|
186
186
|
def cdata_needs_escaping?(node)
|
187
|
-
# Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style`
|
188
|
-
node.cdata? || (Nokogiri.jruby? && node.text? &&
|
187
|
+
# Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` tag as cdata, but it acts that way
|
188
|
+
node.cdata? || (Nokogiri.jruby? && node.text? && node.parent.name == "style")
|
189
189
|
end
|
190
190
|
|
191
191
|
def cdata_escape(node)
|
@@ -198,28 +198,28 @@ module Loofah
|
|
198
198
|
end
|
199
199
|
|
200
200
|
TABLE_FOR_ESCAPE_HTML__ = {
|
201
|
-
|
202
|
-
|
203
|
-
|
201
|
+
"<" => "<",
|
202
|
+
">" => ">",
|
203
|
+
"&" => "&",
|
204
204
|
}
|
205
205
|
|
206
206
|
def escape_tags(string)
|
207
207
|
# modified version of CGI.escapeHTML from ruby 3.1
|
208
208
|
enc = string.encoding
|
209
|
-
|
209
|
+
if enc.ascii_compatible?
|
210
|
+
string = string.b
|
211
|
+
string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
|
212
|
+
string.force_encoding(enc)
|
213
|
+
else
|
210
214
|
if enc.dummy?
|
211
215
|
origenc = enc
|
212
216
|
enc = Encoding::Converter.asciicompat_encoding(enc)
|
213
217
|
string = enc ? string.encode(enc) : string.b
|
214
218
|
end
|
215
|
-
table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
|
219
|
+
table = Hash[TABLE_FOR_ESCAPE_HTML__.map { |pair| pair.map { |s| s.encode(enc) } }]
|
216
220
|
string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
|
217
221
|
string.encode!(origenc) if origenc
|
218
222
|
string
|
219
|
-
else
|
220
|
-
string = string.b
|
221
|
-
string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
|
222
|
-
string.force_encoding(enc)
|
223
223
|
end
|
224
224
|
end
|
225
225
|
end
|
data/lib/loofah/metahelpers.rb
CHANGED
@@ -1,12 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Loofah
|
3
4
|
module MetaHelpers # :nodoc:
|
4
|
-
|
5
|
-
mojule
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
class << self
|
6
|
+
def add_downcased_set_members_to_all_set_constants(mojule)
|
7
|
+
mojule.constants.each do |constant_sym|
|
8
|
+
constant = mojule.const_get(constant_sym)
|
9
|
+
next unless Set === constant
|
10
|
+
|
11
|
+
constant.dup.each do |member|
|
12
|
+
constant.add(member.downcase)
|
13
|
+
end
|
10
14
|
end
|
11
15
|
end
|
12
16
|
end
|
data/lib/loofah/scrubber.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Loofah
|
3
4
|
#
|
4
5
|
# A RuntimeError raised when Loofah could not find an appropriate scrubber.
|
@@ -24,7 +25,7 @@ module Loofah
|
|
24
25
|
#
|
25
26
|
# This can then be run on a document:
|
26
27
|
#
|
27
|
-
# Loofah.
|
28
|
+
# Loofah.html5_fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
|
28
29
|
# # => "<div>foo</div><p>bar</p>"
|
29
30
|
#
|
30
31
|
# Scrubbers can be run on a document in either a top-down traversal (the
|
@@ -32,7 +33,6 @@ module Loofah
|
|
32
33
|
# Scrubber::STOP to terminate the traversal of a subtree.
|
33
34
|
#
|
34
35
|
class Scrubber
|
35
|
-
|
36
36
|
# Top-down Scrubbers may return CONTINUE to indicate that the subtree should be traversed.
|
37
37
|
CONTINUE = Object.new.freeze
|
38
38
|
|
@@ -67,7 +67,9 @@ module Loofah
|
|
67
67
|
unless [:top_down, :bottom_up].include?(direction)
|
68
68
|
raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
|
69
69
|
end
|
70
|
-
|
70
|
+
|
71
|
+
@direction = direction
|
72
|
+
@block = block
|
71
73
|
end
|
72
74
|
|
73
75
|
#
|
@@ -84,7 +86,7 @@ module Loofah
|
|
84
86
|
# +scrub+, which will be called for each document node.
|
85
87
|
#
|
86
88
|
def scrub(node)
|
87
|
-
raise ScrubberNotFound, "No scrub method has been defined on #{self.class
|
89
|
+
raise ScrubberNotFound, "No scrub method has been defined on #{self.class}"
|
88
90
|
end
|
89
91
|
|
90
92
|
#
|
@@ -103,8 +105,8 @@ module Loofah
|
|
103
105
|
def html5lib_sanitize(node)
|
104
106
|
case node.type
|
105
107
|
when Nokogiri::XML::Node::ELEMENT_NODE
|
106
|
-
if HTML5::Scrub.allowed_element?
|
107
|
-
HTML5::Scrub.scrub_attributes
|
108
|
+
if HTML5::Scrub.allowed_element?(node.name)
|
109
|
+
HTML5::Scrub.scrub_attributes(node)
|
108
110
|
return Scrubber::CONTINUE
|
109
111
|
end
|
110
112
|
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
|
@@ -120,8 +122,8 @@ module Loofah
|
|
120
122
|
def traverse_conditionally_top_down(node)
|
121
123
|
if block
|
122
124
|
return if block.call(node) == STOP
|
123
|
-
|
124
|
-
return
|
125
|
+
elsif scrub(node) == STOP
|
126
|
+
return
|
125
127
|
end
|
126
128
|
node.children.each { |j| traverse_conditionally_top_down(j) }
|
127
129
|
end
|
data/lib/loofah/scrubbers.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Loofah
|
3
4
|
#
|
4
5
|
# Loofah provides some built-in scrubbers for sanitizing with
|
@@ -11,7 +12,7 @@ module Loofah
|
|
11
12
|
# +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
|
12
13
|
#
|
13
14
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
14
|
-
# Loofah.
|
15
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:strip)
|
15
16
|
# => "ohai! <div>div is safe</div> but foo is <b>not</b>"
|
16
17
|
#
|
17
18
|
#
|
@@ -20,7 +21,7 @@ module Loofah
|
|
20
21
|
# +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
|
21
22
|
#
|
22
23
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
23
|
-
# Loofah.
|
24
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:prune)
|
24
25
|
# => "ohai! <div>div is safe</div> "
|
25
26
|
#
|
26
27
|
#
|
@@ -29,7 +30,7 @@ module Loofah
|
|
29
30
|
# +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
|
30
31
|
#
|
31
32
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
32
|
-
# Loofah.
|
33
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:escape)
|
33
34
|
# => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
34
35
|
#
|
35
36
|
#
|
@@ -41,7 +42,7 @@ module Loofah
|
|
41
42
|
# layer of paint on top of the HTML input to make it look nice.
|
42
43
|
#
|
43
44
|
# messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
|
44
|
-
# Loofah.
|
45
|
+
# Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
|
45
46
|
# => "ohai! <div>div with attributes</div>"
|
46
47
|
#
|
47
48
|
# One use case for this scrubber is to clean up HTML that was
|
@@ -56,7 +57,7 @@ module Loofah
|
|
56
57
|
# +:nofollow+ adds a rel="nofollow" attribute to all links
|
57
58
|
#
|
58
59
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
59
|
-
# Loofah.
|
60
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
|
60
61
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
|
61
62
|
#
|
62
63
|
#
|
@@ -65,7 +66,7 @@ module Loofah
|
|
65
66
|
# +:noopener+ adds a rel="noopener" attribute to all links
|
66
67
|
#
|
67
68
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
68
|
-
# Loofah.
|
69
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
|
69
70
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
|
70
71
|
#
|
71
72
|
#
|
@@ -74,7 +75,7 @@ module Loofah
|
|
74
75
|
# +:unprintable+ removes unprintable Unicode characters.
|
75
76
|
#
|
76
77
|
# markup = "<p>Some text with an unprintable character at the end\u2028</p>"
|
77
|
-
# Loofah.
|
78
|
+
# Loofah.html5_fragment(markup).scrub!(:unprintable)
|
78
79
|
# => "<p>Some text with an unprintable character at the end</p>"
|
79
80
|
#
|
80
81
|
# You may not be able to see the unprintable character in the above example, but there is a
|
@@ -90,19 +91,20 @@ module Loofah
|
|
90
91
|
# +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
|
91
92
|
#
|
92
93
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
93
|
-
# Loofah.
|
94
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:strip)
|
94
95
|
# => "ohai! <div>div is safe</div> but foo is <b>not</b>"
|
95
96
|
#
|
96
97
|
class Strip < Scrubber
|
97
|
-
def initialize
|
98
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
98
99
|
@direction = :bottom_up
|
99
100
|
end
|
100
101
|
|
101
102
|
def scrub(node)
|
102
103
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
104
|
+
|
103
105
|
node.before(node.children)
|
104
106
|
node.remove
|
105
|
-
|
107
|
+
STOP
|
106
108
|
end
|
107
109
|
end
|
108
110
|
|
@@ -112,18 +114,19 @@ module Loofah
|
|
112
114
|
# +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
|
113
115
|
#
|
114
116
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
115
|
-
# Loofah.
|
117
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:prune)
|
116
118
|
# => "ohai! <div>div is safe</div> "
|
117
119
|
#
|
118
120
|
class Prune < Scrubber
|
119
|
-
def initialize
|
121
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
120
122
|
@direction = :top_down
|
121
123
|
end
|
122
124
|
|
123
125
|
def scrub(node)
|
124
126
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
127
|
+
|
125
128
|
node.remove
|
126
|
-
|
129
|
+
STOP
|
127
130
|
end
|
128
131
|
end
|
129
132
|
|
@@ -133,19 +136,20 @@ module Loofah
|
|
133
136
|
# +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
|
134
137
|
#
|
135
138
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
136
|
-
# Loofah.
|
139
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:escape)
|
137
140
|
# => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
138
141
|
#
|
139
142
|
class Escape < Scrubber
|
140
|
-
def initialize
|
143
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
141
144
|
@direction = :top_down
|
142
145
|
end
|
143
146
|
|
144
147
|
def scrub(node)
|
145
148
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
146
|
-
|
149
|
+
|
150
|
+
node.add_next_sibling(Nokogiri::XML::Text.new(node.to_s, node.document))
|
147
151
|
node.remove
|
148
|
-
|
152
|
+
STOP
|
149
153
|
end
|
150
154
|
end
|
151
155
|
|
@@ -158,7 +162,7 @@ module Loofah
|
|
158
162
|
# layer of paint on top of the HTML input to make it look nice.
|
159
163
|
#
|
160
164
|
# messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
|
161
|
-
# Loofah.
|
165
|
+
# Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
|
162
166
|
# => "ohai! <div>div with attributes</div>"
|
163
167
|
#
|
164
168
|
# One use case for this scrubber is to clean up HTML that was
|
@@ -168,14 +172,14 @@ module Loofah
|
|
168
172
|
# Certainly not me.
|
169
173
|
#
|
170
174
|
class Whitewash < Scrubber
|
171
|
-
def initialize
|
175
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
172
176
|
@direction = :top_down
|
173
177
|
end
|
174
178
|
|
175
179
|
def scrub(node)
|
176
180
|
case node.type
|
177
181
|
when Nokogiri::XML::Node::ELEMENT_NODE
|
178
|
-
if HTML5::Scrub.allowed_element?
|
182
|
+
if HTML5::Scrub.allowed_element?(node.name)
|
179
183
|
node.attributes.each { |attr| node.remove_attribute(attr.first) }
|
180
184
|
return CONTINUE if node.namespaces.empty?
|
181
185
|
end
|
@@ -193,18 +197,19 @@ module Loofah
|
|
193
197
|
# +:nofollow+ adds a rel="nofollow" attribute to all links
|
194
198
|
#
|
195
199
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
196
|
-
# Loofah.
|
200
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
|
197
201
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
|
198
202
|
#
|
199
203
|
class NoFollow < Scrubber
|
200
|
-
def initialize
|
204
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
201
205
|
@direction = :top_down
|
202
206
|
end
|
203
207
|
|
204
208
|
def scrub(node)
|
205
209
|
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
210
|
+
|
206
211
|
append_attribute(node, "rel", "nofollow")
|
207
|
-
|
212
|
+
STOP
|
208
213
|
end
|
209
214
|
end
|
210
215
|
|
@@ -214,35 +219,37 @@ module Loofah
|
|
214
219
|
# +:noopener+ adds a rel="noopener" attribute to all links
|
215
220
|
#
|
216
221
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
217
|
-
# Loofah.
|
222
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
|
218
223
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
|
219
224
|
#
|
220
225
|
class NoOpener < Scrubber
|
221
|
-
def initialize
|
226
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
222
227
|
@direction = :top_down
|
223
228
|
end
|
224
229
|
|
225
230
|
def scrub(node)
|
226
231
|
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
232
|
+
|
227
233
|
append_attribute(node, "rel", "noopener")
|
228
|
-
|
234
|
+
STOP
|
229
235
|
end
|
230
236
|
end
|
231
237
|
|
232
238
|
# This class probably isn't useful publicly, but is used for #to_text's current implemention
|
233
239
|
class NewlineBlockElements < Scrubber # :nodoc:
|
234
|
-
def initialize
|
240
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
235
241
|
@direction = :bottom_up
|
236
242
|
end
|
237
243
|
|
238
244
|
def scrub(node)
|
239
245
|
return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
|
246
|
+
|
240
247
|
replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
|
241
248
|
"\n"
|
242
249
|
else
|
243
250
|
"\n#{node.content}\n"
|
244
251
|
end
|
245
|
-
node.add_next_sibling
|
252
|
+
node.add_next_sibling(Nokogiri::XML::Text.new(replacement, node.document))
|
246
253
|
node.remove
|
247
254
|
end
|
248
255
|
end
|
@@ -253,7 +260,7 @@ module Loofah
|
|
253
260
|
# +:unprintable+ removes unprintable Unicode characters.
|
254
261
|
#
|
255
262
|
# markup = "<p>Some text with an unprintable character at the end\u2028</p>"
|
256
|
-
# Loofah.
|
263
|
+
# Loofah.html5_fragment(markup).scrub!(:unprintable)
|
257
264
|
# => "<p>Some text with an unprintable character at the end</p>"
|
258
265
|
#
|
259
266
|
# You may not be able to see the unprintable character in the above example, but there is a
|
@@ -263,7 +270,7 @@ module Loofah
|
|
263
270
|
# http://timelessrepo.com/json-isnt-a-javascript-subset
|
264
271
|
#
|
265
272
|
class Unprintable < Scrubber
|
266
|
-
def initialize
|
273
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
267
274
|
@direction = :top_down
|
268
275
|
end
|
269
276
|
|
@@ -279,21 +286,23 @@ module Loofah
|
|
279
286
|
# A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
|
280
287
|
#
|
281
288
|
MAP = {
|
282
|
-
:
|
283
|
-
:
|
284
|
-
:
|
285
|
-
:
|
286
|
-
:
|
287
|
-
:
|
288
|
-
:
|
289
|
-
:
|
289
|
+
escape: Escape,
|
290
|
+
prune: Prune,
|
291
|
+
whitewash: Whitewash,
|
292
|
+
strip: Strip,
|
293
|
+
nofollow: NoFollow,
|
294
|
+
noopener: NoOpener,
|
295
|
+
newline_block_elements: NewlineBlockElements,
|
296
|
+
unprintable: Unprintable,
|
290
297
|
}
|
291
298
|
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
299
|
+
class << self
|
300
|
+
#
|
301
|
+
# Returns an array of symbols representing the built-in scrubbers
|
302
|
+
#
|
303
|
+
def scrubber_symbols
|
304
|
+
MAP.keys
|
305
|
+
end
|
297
306
|
end
|
298
307
|
end
|
299
308
|
end
|
data/lib/loofah/version.rb
CHANGED
data/lib/loofah/xml/document.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Loofah
|
3
4
|
module XML # :nodoc:
|
4
5
|
#
|
@@ -8,15 +9,10 @@ module Loofah
|
|
8
9
|
#
|
9
10
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
10
11
|
class << self
|
11
|
-
#
|
12
|
-
# Overridden Nokogiri::XML::DocumentFragment
|
13
|
-
# constructor. Applications should use Loofah.fragment to
|
14
|
-
# parse a fragment.
|
15
|
-
#
|
16
12
|
def parse(tags)
|
17
13
|
doc = Loofah::XML::Document.new
|
18
14
|
doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
|
19
|
-
|
15
|
+
new(doc, tags)
|
20
16
|
end
|
21
17
|
end
|
22
18
|
end
|