loofah 0.4.2 → 2.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +604 -0
- data/MIT-LICENSE.txt +3 -1
- data/README.md +410 -0
- data/SECURITY.md +18 -0
- data/lib/loofah/concerns.rb +207 -0
- data/lib/loofah/elements.rb +98 -0
- data/lib/loofah/helpers.rb +91 -4
- data/lib/loofah/html4/document.rb +17 -0
- data/lib/loofah/html4/document_fragment.rb +15 -0
- data/lib/loofah/html5/document.rb +17 -0
- data/lib/loofah/html5/document_fragment.rb +15 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +28 -0
- data/lib/loofah/html5/safelist.rb +1058 -0
- data/lib/loofah/html5/scrub.rb +211 -40
- data/lib/loofah/metahelpers.rb +18 -0
- data/lib/loofah/scrubber.rb +31 -13
- data/lib/loofah/scrubbers.rb +262 -31
- data/lib/loofah/version.rb +6 -0
- data/lib/loofah/xml/document.rb +2 -0
- data/lib/loofah/xml/document_fragment.rb +6 -9
- data/lib/loofah.rb +131 -52
- metadata +79 -158
- data/CHANGELOG.rdoc +0 -92
- data/DEPRECATED.rdoc +0 -12
- data/Manifest.txt +0 -34
- data/README.rdoc +0 -330
- data/Rakefile +0 -61
- data/TODO.rdoc +0 -4
- data/benchmark/benchmark.rb +0 -149
- data/benchmark/fragment.html +0 -96
- data/benchmark/helper.rb +0 -73
- data/benchmark/www.slashdot.com.html +0 -2560
- data/init.rb +0 -1
- data/lib/loofah/active_record.rb +0 -62
- data/lib/loofah/html/document.rb +0 -22
- data/lib/loofah/html/document_fragment.rb +0 -46
- data/lib/loofah/html5/whitelist.rb +0 -174
- data/lib/loofah/instance_methods.rb +0 -77
- data/lib/loofah/xss_foliate.rb +0 -212
- data/test/helper.rb +0 -8
- data/test/html5/test_sanitizer.rb +0 -248
- data/test/test_active_record.rb +0 -146
- data/test/test_ad_hoc.rb +0 -272
- data/test/test_api.rb +0 -128
- data/test/test_helpers.rb +0 -28
- data/test/test_scrubber.rb +0 -227
- data/test/test_scrubbers.rb +0 -144
- data/test/test_xss_foliate.rb +0 -171
- data.tar.gz.sig +0 -0
- metadata.gz.sig +0 -2
data/lib/loofah/scrubbers.rb
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Loofah
|
|
2
4
|
#
|
|
3
5
|
# Loofah provides some built-in scrubbers for sanitizing with
|
|
4
|
-
# HTML5lib's
|
|
6
|
+
# HTML5lib's safelist and for accomplishing some common
|
|
5
7
|
# transformation tasks.
|
|
6
8
|
#
|
|
7
9
|
#
|
|
@@ -10,7 +12,7 @@ module Loofah
|
|
|
10
12
|
# +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
|
|
11
13
|
#
|
|
12
14
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
|
13
|
-
# Loofah.
|
|
15
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:strip)
|
|
14
16
|
# => "ohai! <div>div is safe</div> but foo is <b>not</b>"
|
|
15
17
|
#
|
|
16
18
|
#
|
|
@@ -19,7 +21,7 @@ module Loofah
|
|
|
19
21
|
# +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
|
|
20
22
|
#
|
|
21
23
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
|
22
|
-
# Loofah.
|
|
24
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:prune)
|
|
23
25
|
# => "ohai! <div>div is safe</div> "
|
|
24
26
|
#
|
|
25
27
|
#
|
|
@@ -28,7 +30,7 @@ module Loofah
|
|
|
28
30
|
# +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
|
|
29
31
|
#
|
|
30
32
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
|
31
|
-
# Loofah.
|
|
33
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:escape)
|
|
32
34
|
# => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
|
33
35
|
#
|
|
34
36
|
#
|
|
@@ -40,7 +42,7 @@ module Loofah
|
|
|
40
42
|
# layer of paint on top of the HTML input to make it look nice.
|
|
41
43
|
#
|
|
42
44
|
# messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
|
|
43
|
-
# Loofah.
|
|
45
|
+
# Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
|
|
44
46
|
# => "ohai! <div>div with attributes</div>"
|
|
45
47
|
#
|
|
46
48
|
# One use case for this scrubber is to clean up HTML that was
|
|
@@ -55,30 +57,71 @@ module Loofah
|
|
|
55
57
|
# +:nofollow+ adds a rel="nofollow" attribute to all links
|
|
56
58
|
#
|
|
57
59
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
|
58
|
-
# Loofah.
|
|
60
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
|
|
59
61
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
|
|
60
62
|
#
|
|
61
63
|
#
|
|
64
|
+
# === Loofah::Scrubbers::TargetBlank / scrub!(:targetblank)
|
|
65
|
+
#
|
|
66
|
+
# +:targetblank+ adds a target="_blank" attribute to all links
|
|
67
|
+
#
|
|
68
|
+
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
|
69
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:targetblank)
|
|
70
|
+
# => "ohai! <a href='http://www.myswarmysite.com/' target="_blank">I like your blog post</a>"
|
|
71
|
+
#
|
|
72
|
+
#
|
|
73
|
+
# === Loofah::Scrubbers::NoOpener / scrub!(:noopener)
|
|
74
|
+
#
|
|
75
|
+
# +:noopener+ adds a rel="noopener" attribute to all links
|
|
76
|
+
#
|
|
77
|
+
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
|
78
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
|
|
79
|
+
# => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
|
|
80
|
+
#
|
|
81
|
+
# === Loofah::Scrubbers::NoReferrer / scrub!(:noreferrer)
|
|
82
|
+
#
|
|
83
|
+
# +:noreferrer+ adds a rel="noreferrer" attribute to all links
|
|
84
|
+
#
|
|
85
|
+
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
|
86
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:noreferrer)
|
|
87
|
+
# => "ohai! <a href='http://www.myswarmysite.com/' rel="noreferrer">I like your blog post</a>"
|
|
88
|
+
#
|
|
89
|
+
#
|
|
90
|
+
# === Loofah::Scrubbers::Unprintable / scrub!(:unprintable)
|
|
91
|
+
#
|
|
92
|
+
# +:unprintable+ removes unprintable Unicode characters.
|
|
93
|
+
#
|
|
94
|
+
# markup = "<p>Some text with an unprintable character at the end\u2028</p>"
|
|
95
|
+
# Loofah.html5_fragment(markup).scrub!(:unprintable)
|
|
96
|
+
# => "<p>Some text with an unprintable character at the end</p>"
|
|
97
|
+
#
|
|
98
|
+
# You may not be able to see the unprintable character in the above example, but there is a
|
|
99
|
+
# U+2028 character right before the closing </p> tag. These characters can cause issues if
|
|
100
|
+
# the content is ever parsed by JavaScript - more information here:
|
|
101
|
+
#
|
|
102
|
+
# http://timelessrepo.com/json-isnt-a-javascript-subset
|
|
103
|
+
#
|
|
62
104
|
module Scrubbers
|
|
63
|
-
|
|
64
105
|
#
|
|
65
106
|
# === scrub!(:strip)
|
|
66
107
|
#
|
|
67
108
|
# +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
|
|
68
109
|
#
|
|
69
110
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
|
70
|
-
# Loofah.
|
|
111
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:strip)
|
|
71
112
|
# => "ohai! <div>div is safe</div> but foo is <b>not</b>"
|
|
72
113
|
#
|
|
73
114
|
class Strip < Scrubber
|
|
74
|
-
def initialize
|
|
115
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
|
75
116
|
@direction = :bottom_up
|
|
76
117
|
end
|
|
77
118
|
|
|
78
119
|
def scrub(node)
|
|
79
120
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
|
80
|
-
|
|
121
|
+
|
|
122
|
+
node.before(node.children)
|
|
81
123
|
node.remove
|
|
124
|
+
STOP
|
|
82
125
|
end
|
|
83
126
|
end
|
|
84
127
|
|
|
@@ -88,18 +131,19 @@ module Loofah
|
|
|
88
131
|
# +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
|
|
89
132
|
#
|
|
90
133
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
|
91
|
-
# Loofah.
|
|
134
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:prune)
|
|
92
135
|
# => "ohai! <div>div is safe</div> "
|
|
93
136
|
#
|
|
94
137
|
class Prune < Scrubber
|
|
95
|
-
def initialize
|
|
138
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
|
96
139
|
@direction = :top_down
|
|
97
140
|
end
|
|
98
141
|
|
|
99
142
|
def scrub(node)
|
|
100
143
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
|
144
|
+
|
|
101
145
|
node.remove
|
|
102
|
-
|
|
146
|
+
STOP
|
|
103
147
|
end
|
|
104
148
|
end
|
|
105
149
|
|
|
@@ -109,20 +153,20 @@ module Loofah
|
|
|
109
153
|
# +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
|
|
110
154
|
#
|
|
111
155
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
|
112
|
-
# Loofah.
|
|
156
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:escape)
|
|
113
157
|
# => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
|
114
158
|
#
|
|
115
159
|
class Escape < Scrubber
|
|
116
|
-
def initialize
|
|
160
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
|
117
161
|
@direction = :top_down
|
|
118
162
|
end
|
|
119
163
|
|
|
120
164
|
def scrub(node)
|
|
121
165
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
|
122
|
-
|
|
123
|
-
node.add_next_sibling
|
|
166
|
+
|
|
167
|
+
node.add_next_sibling(Nokogiri::XML::Text.new(node.to_s, node.document))
|
|
124
168
|
node.remove
|
|
125
|
-
|
|
169
|
+
STOP
|
|
126
170
|
end
|
|
127
171
|
end
|
|
128
172
|
|
|
@@ -135,7 +179,7 @@ module Loofah
|
|
|
135
179
|
# layer of paint on top of the HTML input to make it look nice.
|
|
136
180
|
#
|
|
137
181
|
# messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
|
|
138
|
-
# Loofah.
|
|
182
|
+
# Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
|
|
139
183
|
# => "ohai! <div>div with attributes</div>"
|
|
140
184
|
#
|
|
141
185
|
# One use case for this scrubber is to clean up HTML that was
|
|
@@ -145,14 +189,14 @@ module Loofah
|
|
|
145
189
|
# Certainly not me.
|
|
146
190
|
#
|
|
147
191
|
class Whitewash < Scrubber
|
|
148
|
-
def initialize
|
|
192
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
|
149
193
|
@direction = :top_down
|
|
150
194
|
end
|
|
151
195
|
|
|
152
196
|
def scrub(node)
|
|
153
197
|
case node.type
|
|
154
198
|
when Nokogiri::XML::Node::ELEMENT_NODE
|
|
155
|
-
if HTML5::
|
|
199
|
+
if HTML5::Scrub.allowed_element?(node.name)
|
|
156
200
|
node.attributes.each { |attr| node.remove_attribute(attr.first) }
|
|
157
201
|
return CONTINUE if node.namespaces.empty?
|
|
158
202
|
end
|
|
@@ -170,30 +214,217 @@ module Loofah
|
|
|
170
214
|
# +:nofollow+ adds a rel="nofollow" attribute to all links
|
|
171
215
|
#
|
|
172
216
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
|
173
|
-
# Loofah.
|
|
217
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
|
|
174
218
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
|
|
175
219
|
#
|
|
176
220
|
class NoFollow < Scrubber
|
|
177
|
-
def initialize
|
|
221
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
|
222
|
+
@direction = :top_down
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def scrub(node)
|
|
226
|
+
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
|
227
|
+
|
|
228
|
+
append_attribute(node, "rel", "nofollow")
|
|
229
|
+
STOP
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
#
|
|
234
|
+
# === scrub!(:targetblank)
|
|
235
|
+
#
|
|
236
|
+
# +:targetblank+ adds a target="_blank" attribute to all links.
|
|
237
|
+
# If there is a target already set, replaces it with target="_blank".
|
|
238
|
+
#
|
|
239
|
+
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
|
240
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:targetblank)
|
|
241
|
+
# => "ohai! <a href='http://www.myswarmysite.com/' target="_blank">I like your blog post</a>"
|
|
242
|
+
#
|
|
243
|
+
# On modern browsers, setting target="_blank" on anchor elements implicitly provides the same
|
|
244
|
+
# behavior as setting rel="noopener".
|
|
245
|
+
#
|
|
246
|
+
class TargetBlank < Scrubber
|
|
247
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
|
248
|
+
@direction = :top_down
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def scrub(node)
|
|
252
|
+
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
|
253
|
+
|
|
254
|
+
href = node["href"]
|
|
255
|
+
|
|
256
|
+
node.set_attribute("target", "_blank") if href && href[0] != "#"
|
|
257
|
+
|
|
258
|
+
STOP
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
#
|
|
263
|
+
# === scrub!(:noopener)
|
|
264
|
+
#
|
|
265
|
+
# +:noopener+ adds a rel="noopener" attribute to all links
|
|
266
|
+
#
|
|
267
|
+
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
|
268
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
|
|
269
|
+
# => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
|
|
270
|
+
#
|
|
271
|
+
class NoOpener < Scrubber
|
|
272
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
|
273
|
+
@direction = :top_down
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def scrub(node)
|
|
277
|
+
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
|
278
|
+
|
|
279
|
+
append_attribute(node, "rel", "noopener")
|
|
280
|
+
STOP
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
#
|
|
285
|
+
# === scrub!(:noreferrer)
|
|
286
|
+
#
|
|
287
|
+
# +:noreferrer+ adds a rel="noreferrer" attribute to all links
|
|
288
|
+
#
|
|
289
|
+
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
|
290
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:noreferrer)
|
|
291
|
+
# => "ohai! <a href='http://www.myswarmysite.com/' rel="noreferrer">I like your blog post</a>"
|
|
292
|
+
#
|
|
293
|
+
class NoReferrer < Scrubber
|
|
294
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
|
295
|
+
@direction = :top_down
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def scrub(node)
|
|
299
|
+
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
|
300
|
+
|
|
301
|
+
append_attribute(node, "rel", "noreferrer")
|
|
302
|
+
STOP
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# This class probably isn't useful publicly, but is used for #to_text's current implemention
|
|
307
|
+
class NewlineBlockElements < Scrubber # :nodoc:
|
|
308
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
|
309
|
+
@direction = :bottom_up
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
def scrub(node)
|
|
313
|
+
return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
|
|
314
|
+
|
|
315
|
+
replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
|
|
316
|
+
"\n"
|
|
317
|
+
else
|
|
318
|
+
"\n#{node.content}\n"
|
|
319
|
+
end
|
|
320
|
+
node.add_next_sibling(Nokogiri::XML::Text.new(replacement, node.document))
|
|
321
|
+
node.remove
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
#
|
|
326
|
+
# === scrub!(:unprintable)
|
|
327
|
+
#
|
|
328
|
+
# +:unprintable+ removes unprintable Unicode characters.
|
|
329
|
+
#
|
|
330
|
+
# markup = "<p>Some text with an unprintable character at the end\u2028</p>"
|
|
331
|
+
# Loofah.html5_fragment(markup).scrub!(:unprintable)
|
|
332
|
+
# => "<p>Some text with an unprintable character at the end</p>"
|
|
333
|
+
#
|
|
334
|
+
# You may not be able to see the unprintable character in the above example, but there is a
|
|
335
|
+
# U+2028 character right before the closing </p> tag. These characters can cause issues if
|
|
336
|
+
# the content is ever parsed by JavaScript - more information here:
|
|
337
|
+
#
|
|
338
|
+
# http://timelessrepo.com/json-isnt-a-javascript-subset
|
|
339
|
+
#
|
|
340
|
+
class Unprintable < Scrubber
|
|
341
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
|
178
342
|
@direction = :top_down
|
|
179
343
|
end
|
|
180
344
|
|
|
181
345
|
def scrub(node)
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
346
|
+
if node.type == Nokogiri::XML::Node::TEXT_NODE || node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE
|
|
347
|
+
node.content = node.content.gsub(/\u2028|\u2029/, "")
|
|
348
|
+
end
|
|
349
|
+
CONTINUE
|
|
185
350
|
end
|
|
186
351
|
end
|
|
187
352
|
|
|
353
|
+
#
|
|
354
|
+
# === scrub!(:double_breakpoint)
|
|
355
|
+
#
|
|
356
|
+
# +:double_breakpoint+ replaces double-break tags with closing/opening paragraph tags.
|
|
357
|
+
#
|
|
358
|
+
# markup = "<p>Some text here in a logical paragraph.<br><br>Some more text, apparently a second paragraph.</p>"
|
|
359
|
+
# Loofah.html5_fragment(markup).scrub!(:double_breakpoint)
|
|
360
|
+
# => "<p>Some text here in a logical paragraph.</p><p>Some more text, apparently a second paragraph.</p>"
|
|
361
|
+
#
|
|
362
|
+
class DoubleBreakpoint < Scrubber
|
|
363
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
|
364
|
+
@direction = :top_down
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def scrub(node)
|
|
368
|
+
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "p")
|
|
369
|
+
|
|
370
|
+
paragraph_with_break_point_nodes = node.xpath("//p[br[following-sibling::br]]")
|
|
371
|
+
|
|
372
|
+
paragraph_with_break_point_nodes.each do |paragraph_node|
|
|
373
|
+
new_paragraph = paragraph_node.add_previous_sibling("<p>").first
|
|
374
|
+
|
|
375
|
+
paragraph_node.children.each do |child|
|
|
376
|
+
remove_blank_text_nodes(child)
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
paragraph_node.children.each do |child|
|
|
380
|
+
# already unlinked
|
|
381
|
+
next if child.parent.nil?
|
|
382
|
+
|
|
383
|
+
if child.name == "br" && child.next_sibling.name == "br"
|
|
384
|
+
new_paragraph = paragraph_node.add_previous_sibling("<p>").first
|
|
385
|
+
child.next_sibling.unlink
|
|
386
|
+
child.unlink
|
|
387
|
+
else
|
|
388
|
+
child.parent = new_paragraph
|
|
389
|
+
end
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
paragraph_node.unlink
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
CONTINUE
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
private
|
|
399
|
+
|
|
400
|
+
def remove_blank_text_nodes(node)
|
|
401
|
+
node.unlink if node.text? && node.blank?
|
|
402
|
+
end
|
|
403
|
+
end
|
|
188
404
|
#
|
|
189
405
|
# A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
|
|
190
406
|
#
|
|
191
407
|
MAP = {
|
|
192
|
-
:
|
|
193
|
-
:
|
|
194
|
-
:
|
|
195
|
-
:
|
|
196
|
-
:
|
|
408
|
+
escape: Escape,
|
|
409
|
+
prune: Prune,
|
|
410
|
+
whitewash: Whitewash,
|
|
411
|
+
strip: Strip,
|
|
412
|
+
nofollow: NoFollow,
|
|
413
|
+
noopener: NoOpener,
|
|
414
|
+
noreferrer: NoReferrer,
|
|
415
|
+
targetblank: TargetBlank,
|
|
416
|
+
newline_block_elements: NewlineBlockElements,
|
|
417
|
+
unprintable: Unprintable,
|
|
418
|
+
double_breakpoint: DoubleBreakpoint,
|
|
197
419
|
}
|
|
420
|
+
|
|
421
|
+
class << self
|
|
422
|
+
#
|
|
423
|
+
# Returns an array of symbols representing the built-in scrubbers
|
|
424
|
+
#
|
|
425
|
+
def scrubber_symbols
|
|
426
|
+
MAP.keys
|
|
427
|
+
end
|
|
428
|
+
end
|
|
198
429
|
end
|
|
199
430
|
end
|
data/lib/loofah/xml/document.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Loofah
|
|
2
4
|
module XML # :nodoc:
|
|
3
5
|
#
|
|
@@ -6,16 +8,11 @@ module Loofah
|
|
|
6
8
|
# See Loofah::ScrubBehavior for additional methods.
|
|
7
9
|
#
|
|
8
10
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
|
9
|
-
include Loofah::ScrubBehavior::Node
|
|
10
|
-
|
|
11
11
|
class << self
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
#
|
|
17
|
-
def parse tags
|
|
18
|
-
self.new(Loofah::XML::Document.new, tags)
|
|
12
|
+
def parse(tags)
|
|
13
|
+
doc = Loofah::XML::Document.new
|
|
14
|
+
doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
|
|
15
|
+
new(doc, tags)
|
|
19
16
|
end
|
|
20
17
|
end
|
|
21
18
|
end
|
data/lib/loofah.rb
CHANGED
|
@@ -1,66 +1,155 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require
|
|
3
|
+
require "nokogiri"
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
module Loofah
|
|
6
|
+
class << self
|
|
7
|
+
def html5_support?
|
|
8
|
+
# Note that Loofah can only support HTML5 in Nokogiri >= 1.14.0 because it requires the
|
|
9
|
+
# subclassing fix from https://github.com/sparklemotion/nokogiri/pull/2534
|
|
10
|
+
return @html5_support if defined? @html5_support
|
|
11
|
+
|
|
12
|
+
@html5_support =
|
|
13
|
+
Gem::Version.new(Nokogiri::VERSION) > Gem::Version.new("1.14.0") &&
|
|
14
|
+
Nokogiri.uses_gumbo?
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
require_relative "loofah/version"
|
|
20
|
+
require_relative "loofah/metahelpers"
|
|
21
|
+
require_relative "loofah/elements"
|
|
7
22
|
|
|
8
|
-
|
|
9
|
-
|
|
23
|
+
require_relative "loofah/html5/safelist"
|
|
24
|
+
require_relative "loofah/html5/libxml2_workarounds"
|
|
25
|
+
require_relative "loofah/html5/scrub"
|
|
10
26
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
require 'loofah/xml/document_fragment'
|
|
14
|
-
require 'loofah/html/document'
|
|
15
|
-
require 'loofah/html/document_fragment'
|
|
27
|
+
require_relative "loofah/scrubber"
|
|
28
|
+
require_relative "loofah/scrubbers"
|
|
16
29
|
|
|
17
|
-
|
|
30
|
+
require_relative "loofah/concerns"
|
|
31
|
+
require_relative "loofah/xml/document"
|
|
32
|
+
require_relative "loofah/xml/document_fragment"
|
|
33
|
+
require_relative "loofah/html4/document"
|
|
34
|
+
require_relative "loofah/html4/document_fragment"
|
|
35
|
+
|
|
36
|
+
if Loofah.html5_support?
|
|
37
|
+
require_relative "loofah/html5/document"
|
|
38
|
+
require_relative "loofah/html5/document_fragment"
|
|
39
|
+
end
|
|
18
40
|
|
|
19
41
|
# == Strings and IO Objects as Input
|
|
20
42
|
#
|
|
21
|
-
#
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
#
|
|
43
|
+
# The following methods accept any IO object in addition to accepting a string:
|
|
44
|
+
#
|
|
45
|
+
# - Loofah.html4_document
|
|
46
|
+
# - Loofah.html4_fragment
|
|
47
|
+
# - Loofah.scrub_html4_document
|
|
48
|
+
# - Loofah.scrub_html4_fragment
|
|
49
|
+
#
|
|
50
|
+
# - Loofah.html5_document
|
|
51
|
+
# - Loofah.html5_fragment
|
|
52
|
+
# - Loofah.scrub_html5_document
|
|
53
|
+
# - Loofah.scrub_html5_fragment
|
|
54
|
+
#
|
|
55
|
+
# - Loofah.xml_document
|
|
56
|
+
# - Loofah.xml_fragment
|
|
57
|
+
# - Loofah.scrub_xml_document
|
|
58
|
+
# - Loofah.scrub_xml_fragment
|
|
59
|
+
#
|
|
60
|
+
# - Loofah.document
|
|
61
|
+
# - Loofah.fragment
|
|
62
|
+
# - Loofah.scrub_document
|
|
63
|
+
# - Loofah.scrub_fragment
|
|
64
|
+
#
|
|
65
|
+
# That IO object could be a file, or a socket, or a StringIO, or anything that responds to +read+
|
|
66
|
+
# and +close+.
|
|
26
67
|
#
|
|
27
68
|
module Loofah
|
|
28
|
-
#
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
# The minimum required version of Nokogiri
|
|
32
|
-
REQUIRED_NOKOGIRI_VERSION = '1.3.3'
|
|
69
|
+
# Alias for Loofah::HTML4
|
|
70
|
+
HTML = HTML4
|
|
33
71
|
|
|
34
72
|
class << self
|
|
35
|
-
# Shortcut for Loofah::
|
|
36
|
-
#
|
|
37
|
-
|
|
38
|
-
|
|
73
|
+
# Shortcut for Loofah::HTML4::Document.parse(*args, &block)
|
|
74
|
+
#
|
|
75
|
+
# This method accepts the same parameters as Nokogiri::HTML4::Document.parse
|
|
76
|
+
def html4_document(*args, &block)
|
|
77
|
+
Loofah::HTML4::Document.parse(*args, &block)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Shortcut for Loofah::HTML4::DocumentFragment.parse(*args, &block)
|
|
81
|
+
#
|
|
82
|
+
# This method accepts the same parameters as Nokogiri::HTML4::DocumentFragment.parse
|
|
83
|
+
def html4_fragment(*args, &block)
|
|
84
|
+
Loofah::HTML4::DocumentFragment.parse(*args, &block)
|
|
39
85
|
end
|
|
40
86
|
|
|
41
|
-
# Shortcut for Loofah::
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
Loofah::HTML::DocumentFragment.parse(*args, &block)
|
|
87
|
+
# Shortcut for Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
|
|
88
|
+
def scrub_html4_document(string_or_io, method)
|
|
89
|
+
Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
|
|
45
90
|
end
|
|
46
91
|
|
|
47
|
-
# Shortcut for Loofah.
|
|
48
|
-
def
|
|
49
|
-
Loofah.
|
|
92
|
+
# Shortcut for Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
|
|
93
|
+
def scrub_html4_fragment(string_or_io, method)
|
|
94
|
+
Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
|
|
50
95
|
end
|
|
51
96
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
97
|
+
if Loofah.html5_support?
|
|
98
|
+
# Shortcut for Loofah::HTML5::Document.parse(*args, &block)
|
|
99
|
+
#
|
|
100
|
+
# This method accepts the same parameters as Nokogiri::HTML5::Document.parse
|
|
101
|
+
def html5_document(*args, &block)
|
|
102
|
+
Loofah::HTML5::Document.parse(*args, &block)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Shortcut for Loofah::HTML5::DocumentFragment.parse(*args, &block)
|
|
106
|
+
#
|
|
107
|
+
# This method accepts the same parameters as Nokogiri::HTML5::DocumentFragment.parse
|
|
108
|
+
def html5_fragment(*args, &block)
|
|
109
|
+
Loofah::HTML5::DocumentFragment.parse(*args, &block)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Shortcut for Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
|
|
113
|
+
def scrub_html5_document(string_or_io, method)
|
|
114
|
+
Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Shortcut for Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
|
|
118
|
+
def scrub_html5_fragment(string_or_io, method)
|
|
119
|
+
Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
|
|
120
|
+
end
|
|
121
|
+
else
|
|
122
|
+
def html5_document(*args, &block)
|
|
123
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def html5_fragment(*args, &block)
|
|
127
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def scrub_html5_document(string_or_io, method)
|
|
131
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def scrub_html5_fragment(string_or_io, method)
|
|
135
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
|
136
|
+
end
|
|
55
137
|
end
|
|
56
138
|
|
|
57
|
-
|
|
139
|
+
alias_method :document, :html4_document
|
|
140
|
+
alias_method :fragment, :html4_fragment
|
|
141
|
+
alias_method :scrub_document, :scrub_html4_document
|
|
142
|
+
alias_method :scrub_fragment, :scrub_html4_fragment
|
|
143
|
+
|
|
144
|
+
# Shortcut for Loofah::XML::Document.parse(*args, &block)
|
|
145
|
+
#
|
|
58
146
|
# This method accepts the same parameters as Nokogiri::XML::Document.parse
|
|
59
147
|
def xml_document(*args, &block)
|
|
60
148
|
Loofah::XML::Document.parse(*args, &block)
|
|
61
149
|
end
|
|
62
150
|
|
|
63
|
-
# Shortcut for Loofah::XML::DocumentFragment.parse
|
|
151
|
+
# Shortcut for Loofah::XML::DocumentFragment.parse(*args, &block)
|
|
152
|
+
#
|
|
64
153
|
# This method accepts the same parameters as Nokogiri::XML::DocumentFragment.parse
|
|
65
154
|
def xml_fragment(*args, &block)
|
|
66
155
|
Loofah::XML::DocumentFragment.parse(*args, &block)
|
|
@@ -76,19 +165,9 @@ module Loofah
|
|
|
76
165
|
Loofah.xml_document(string_or_io).scrub!(method)
|
|
77
166
|
end
|
|
78
167
|
|
|
168
|
+
# A helper to remove extraneous whitespace from text-ified HTML
|
|
169
|
+
def remove_extraneous_whitespace(string)
|
|
170
|
+
string.gsub(/\n\s*\n\s*\n/, "\n\n")
|
|
171
|
+
end
|
|
79
172
|
end
|
|
80
173
|
end
|
|
81
|
-
|
|
82
|
-
if Nokogiri::VERSION < Loofah::REQUIRED_NOKOGIRI_VERSION
|
|
83
|
-
raise RuntimeError, "Loofah requires Nokogiri #{Loofah::REQUIRED_NOKOGIRI_VERSION} or later (currently #{Nokogiri::VERSION})"
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
if defined? Rails.configuration and Rails.configuration.frameworks.include?([:active_record]) # rails 2.1 and later
|
|
87
|
-
Rails.configuration.after_initialize do
|
|
88
|
-
require 'loofah/active_record'
|
|
89
|
-
require 'loofah/xss_foliate'
|
|
90
|
-
end
|
|
91
|
-
elsif defined? ActiveRecord::Base # rails 2.0
|
|
92
|
-
require 'loofah/active_record'
|
|
93
|
-
require 'loofah/xss_foliate'
|
|
94
|
-
end
|