loofah 2.2.3 → 2.21.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +269 -31
- data/README.md +109 -124
- data/lib/loofah/concerns.rb +207 -0
- data/lib/loofah/elements.rb +85 -79
- data/lib/loofah/helpers.rb +37 -20
- data/lib/loofah/{html → html4}/document.rb +6 -7
- data/lib/loofah/html4/document_fragment.rb +15 -0
- data/lib/loofah/html5/document.rb +17 -0
- data/lib/loofah/html5/document_fragment.rb +15 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +10 -8
- data/lib/loofah/html5/safelist.rb +1055 -0
- data/lib/loofah/html5/scrub.rb +153 -58
- data/lib/loofah/metahelpers.rb +11 -6
- data/lib/loofah/scrubber.rb +22 -15
- data/lib/loofah/scrubbers.rb +66 -55
- data/lib/loofah/version.rb +6 -0
- data/lib/loofah/xml/document.rb +2 -0
- data/lib/loofah/xml/document_fragment.rb +4 -7
- data/lib/loofah.rb +131 -38
- metadata +28 -216
- data/.gemtest +0 -0
- data/Gemfile +0 -22
- data/Manifest.txt +0 -40
- data/Rakefile +0 -79
- data/benchmark/benchmark.rb +0 -149
- data/benchmark/fragment.html +0 -96
- data/benchmark/helper.rb +0 -73
- data/benchmark/www.slashdot.com.html +0 -2560
- data/lib/loofah/html/document_fragment.rb +0 -40
- data/lib/loofah/html5/whitelist.rb +0 -186
- data/lib/loofah/instance_methods.rb +0 -127
- data/test/assets/msword.html +0 -63
- data/test/assets/testdata_sanitizer_tests1.dat +0 -502
- data/test/helper.rb +0 -18
- data/test/html5/test_sanitizer.rb +0 -382
- data/test/integration/test_ad_hoc.rb +0 -204
- data/test/integration/test_helpers.rb +0 -43
- data/test/integration/test_html.rb +0 -72
- data/test/integration/test_scrubbers.rb +0 -400
- data/test/integration/test_xml.rb +0 -55
- data/test/unit/test_api.rb +0 -142
- data/test/unit/test_encoding.rb +0 -20
- data/test/unit/test_helpers.rb +0 -62
- data/test/unit/test_scrubber.rb +0 -229
- data/test/unit/test_scrubbers.rb +0 -14
data/lib/loofah/scrubbers.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Loofah
|
2
4
|
#
|
3
5
|
# Loofah provides some built-in scrubbers for sanitizing with
|
4
|
-
# HTML5lib's
|
6
|
+
# HTML5lib's safelist and for accomplishing some common
|
5
7
|
# transformation tasks.
|
6
8
|
#
|
7
9
|
#
|
@@ -10,7 +12,7 @@ module Loofah
|
|
10
12
|
# +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
|
11
13
|
#
|
12
14
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
13
|
-
# Loofah.
|
15
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:strip)
|
14
16
|
# => "ohai! <div>div is safe</div> but foo is <b>not</b>"
|
15
17
|
#
|
16
18
|
#
|
@@ -19,7 +21,7 @@ module Loofah
|
|
19
21
|
# +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
|
20
22
|
#
|
21
23
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
22
|
-
# Loofah.
|
24
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:prune)
|
23
25
|
# => "ohai! <div>div is safe</div> "
|
24
26
|
#
|
25
27
|
#
|
@@ -28,7 +30,7 @@ module Loofah
|
|
28
30
|
# +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
|
29
31
|
#
|
30
32
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
31
|
-
# Loofah.
|
33
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:escape)
|
32
34
|
# => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
33
35
|
#
|
34
36
|
#
|
@@ -40,7 +42,7 @@ module Loofah
|
|
40
42
|
# layer of paint on top of the HTML input to make it look nice.
|
41
43
|
#
|
42
44
|
# messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
|
43
|
-
# Loofah.
|
45
|
+
# Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
|
44
46
|
# => "ohai! <div>div with attributes</div>"
|
45
47
|
#
|
46
48
|
# One use case for this scrubber is to clean up HTML that was
|
@@ -55,7 +57,7 @@ module Loofah
|
|
55
57
|
# +:nofollow+ adds a rel="nofollow" attribute to all links
|
56
58
|
#
|
57
59
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
58
|
-
# Loofah.
|
60
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
|
59
61
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
|
60
62
|
#
|
61
63
|
#
|
@@ -64,7 +66,7 @@ module Loofah
|
|
64
66
|
# +:noopener+ adds a rel="noopener" attribute to all links
|
65
67
|
#
|
66
68
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
67
|
-
# Loofah.
|
69
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
|
68
70
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
|
69
71
|
#
|
70
72
|
#
|
@@ -73,7 +75,7 @@ module Loofah
|
|
73
75
|
# +:unprintable+ removes unprintable Unicode characters.
|
74
76
|
#
|
75
77
|
# markup = "<p>Some text with an unprintable character at the end\u2028</p>"
|
76
|
-
# Loofah.
|
78
|
+
# Loofah.html5_fragment(markup).scrub!(:unprintable)
|
77
79
|
# => "<p>Some text with an unprintable character at the end</p>"
|
78
80
|
#
|
79
81
|
# You may not be able to see the unprintable character in the above example, but there is a
|
@@ -89,23 +91,20 @@ module Loofah
|
|
89
91
|
# +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
|
90
92
|
#
|
91
93
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
92
|
-
# Loofah.
|
94
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:strip)
|
93
95
|
# => "ohai! <div>div is safe</div> but foo is <b>not</b>"
|
94
96
|
#
|
95
97
|
class Strip < Scrubber
|
96
|
-
def initialize
|
98
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
97
99
|
@direction = :bottom_up
|
98
100
|
end
|
99
101
|
|
100
102
|
def scrub(node)
|
101
103
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
102
|
-
|
103
|
-
|
104
|
-
node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
|
105
|
-
else
|
106
|
-
node.before node.children
|
107
|
-
end
|
104
|
+
|
105
|
+
node.before(node.children)
|
108
106
|
node.remove
|
107
|
+
STOP
|
109
108
|
end
|
110
109
|
end
|
111
110
|
|
@@ -115,18 +114,19 @@ module Loofah
|
|
115
114
|
# +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
|
116
115
|
#
|
117
116
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
118
|
-
# Loofah.
|
117
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:prune)
|
119
118
|
# => "ohai! <div>div is safe</div> "
|
120
119
|
#
|
121
120
|
class Prune < Scrubber
|
122
|
-
def initialize
|
121
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
123
122
|
@direction = :top_down
|
124
123
|
end
|
125
124
|
|
126
125
|
def scrub(node)
|
127
126
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
127
|
+
|
128
128
|
node.remove
|
129
|
-
|
129
|
+
STOP
|
130
130
|
end
|
131
131
|
end
|
132
132
|
|
@@ -136,19 +136,20 @@ module Loofah
|
|
136
136
|
# +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
|
137
137
|
#
|
138
138
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
139
|
-
# Loofah.
|
139
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:escape)
|
140
140
|
# => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
141
141
|
#
|
142
142
|
class Escape < Scrubber
|
143
|
-
def initialize
|
143
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
144
144
|
@direction = :top_down
|
145
145
|
end
|
146
146
|
|
147
147
|
def scrub(node)
|
148
148
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
149
|
-
|
149
|
+
|
150
|
+
node.add_next_sibling(Nokogiri::XML::Text.new(node.to_s, node.document))
|
150
151
|
node.remove
|
151
|
-
|
152
|
+
STOP
|
152
153
|
end
|
153
154
|
end
|
154
155
|
|
@@ -161,7 +162,7 @@ module Loofah
|
|
161
162
|
# layer of paint on top of the HTML input to make it look nice.
|
162
163
|
#
|
163
164
|
# messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
|
164
|
-
# Loofah.
|
165
|
+
# Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
|
165
166
|
# => "ohai! <div>div with attributes</div>"
|
166
167
|
#
|
167
168
|
# One use case for this scrubber is to clean up HTML that was
|
@@ -171,14 +172,14 @@ module Loofah
|
|
171
172
|
# Certainly not me.
|
172
173
|
#
|
173
174
|
class Whitewash < Scrubber
|
174
|
-
def initialize
|
175
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
175
176
|
@direction = :top_down
|
176
177
|
end
|
177
178
|
|
178
179
|
def scrub(node)
|
179
180
|
case node.type
|
180
181
|
when Nokogiri::XML::Node::ELEMENT_NODE
|
181
|
-
if HTML5::Scrub.allowed_element?
|
182
|
+
if HTML5::Scrub.allowed_element?(node.name)
|
182
183
|
node.attributes.each { |attr| node.remove_attribute(attr.first) }
|
183
184
|
return CONTINUE if node.namespaces.empty?
|
184
185
|
end
|
@@ -196,18 +197,19 @@ module Loofah
|
|
196
197
|
# +:nofollow+ adds a rel="nofollow" attribute to all links
|
197
198
|
#
|
198
199
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
199
|
-
# Loofah.
|
200
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
|
200
201
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
|
201
202
|
#
|
202
203
|
class NoFollow < Scrubber
|
203
|
-
def initialize
|
204
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
204
205
|
@direction = :top_down
|
205
206
|
end
|
206
207
|
|
207
208
|
def scrub(node)
|
208
|
-
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name ==
|
209
|
-
|
210
|
-
|
209
|
+
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
210
|
+
|
211
|
+
append_attribute(node, "rel", "nofollow")
|
212
|
+
STOP
|
211
213
|
end
|
212
214
|
end
|
213
215
|
|
@@ -217,30 +219,37 @@ module Loofah
|
|
217
219
|
# +:noopener+ adds a rel="noopener" attribute to all links
|
218
220
|
#
|
219
221
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
220
|
-
# Loofah.
|
222
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
|
221
223
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
|
222
224
|
#
|
223
225
|
class NoOpener < Scrubber
|
224
|
-
def initialize
|
226
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
225
227
|
@direction = :top_down
|
226
228
|
end
|
227
229
|
|
228
230
|
def scrub(node)
|
229
|
-
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name ==
|
230
|
-
|
231
|
-
|
231
|
+
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
232
|
+
|
233
|
+
append_attribute(node, "rel", "noopener")
|
234
|
+
STOP
|
232
235
|
end
|
233
236
|
end
|
234
237
|
|
235
238
|
# This class probably isn't useful publicly, but is used for #to_text's current implemention
|
236
239
|
class NewlineBlockElements < Scrubber # :nodoc:
|
237
|
-
def initialize
|
240
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
238
241
|
@direction = :bottom_up
|
239
242
|
end
|
240
243
|
|
241
244
|
def scrub(node)
|
242
|
-
return CONTINUE unless Loofah::Elements::
|
243
|
-
|
245
|
+
return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
|
246
|
+
|
247
|
+
replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
|
248
|
+
"\n"
|
249
|
+
else
|
250
|
+
"\n#{node.content}\n"
|
251
|
+
end
|
252
|
+
node.add_next_sibling(Nokogiri::XML::Text.new(replacement, node.document))
|
244
253
|
node.remove
|
245
254
|
end
|
246
255
|
end
|
@@ -251,7 +260,7 @@ module Loofah
|
|
251
260
|
# +:unprintable+ removes unprintable Unicode characters.
|
252
261
|
#
|
253
262
|
# markup = "<p>Some text with an unprintable character at the end\u2028</p>"
|
254
|
-
# Loofah.
|
263
|
+
# Loofah.html5_fragment(markup).scrub!(:unprintable)
|
255
264
|
# => "<p>Some text with an unprintable character at the end</p>"
|
256
265
|
#
|
257
266
|
# You may not be able to see the unprintable character in the above example, but there is a
|
@@ -261,13 +270,13 @@ module Loofah
|
|
261
270
|
# http://timelessrepo.com/json-isnt-a-javascript-subset
|
262
271
|
#
|
263
272
|
class Unprintable < Scrubber
|
264
|
-
def initialize
|
273
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
265
274
|
@direction = :top_down
|
266
275
|
end
|
267
276
|
|
268
277
|
def scrub(node)
|
269
278
|
if node.type == Nokogiri::XML::Node::TEXT_NODE || node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE
|
270
|
-
node.content = node.content.gsub(/\u2028|\u2029/,
|
279
|
+
node.content = node.content.gsub(/\u2028|\u2029/, "")
|
271
280
|
end
|
272
281
|
CONTINUE
|
273
282
|
end
|
@@ -277,21 +286,23 @@ module Loofah
|
|
277
286
|
# A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
|
278
287
|
#
|
279
288
|
MAP = {
|
280
|
-
:
|
281
|
-
:
|
282
|
-
:
|
283
|
-
:
|
284
|
-
:
|
285
|
-
:
|
286
|
-
:
|
287
|
-
:
|
289
|
+
escape: Escape,
|
290
|
+
prune: Prune,
|
291
|
+
whitewash: Whitewash,
|
292
|
+
strip: Strip,
|
293
|
+
nofollow: NoFollow,
|
294
|
+
noopener: NoOpener,
|
295
|
+
newline_block_elements: NewlineBlockElements,
|
296
|
+
unprintable: Unprintable,
|
288
297
|
}
|
289
298
|
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
299
|
+
class << self
|
300
|
+
#
|
301
|
+
# Returns an array of symbols representing the built-in scrubbers
|
302
|
+
#
|
303
|
+
def scrubber_symbols
|
304
|
+
MAP.keys
|
305
|
+
end
|
295
306
|
end
|
296
307
|
end
|
297
308
|
end
|
data/lib/loofah/xml/document.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Loofah
|
2
4
|
module XML # :nodoc:
|
3
5
|
#
|
@@ -7,15 +9,10 @@ module Loofah
|
|
7
9
|
#
|
8
10
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
9
11
|
class << self
|
10
|
-
|
11
|
-
# Overridden Nokogiri::XML::DocumentFragment
|
12
|
-
# constructor. Applications should use Loofah.fragment to
|
13
|
-
# parse a fragment.
|
14
|
-
#
|
15
|
-
def parse tags
|
12
|
+
def parse(tags)
|
16
13
|
doc = Loofah::XML::Document.new
|
17
14
|
doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
|
18
|
-
|
15
|
+
new(doc, tags)
|
19
16
|
end
|
20
17
|
end
|
21
18
|
end
|
data/lib/loofah.rb
CHANGED
@@ -1,65 +1,158 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "nokogiri"
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
module Loofah
|
6
|
+
class << self
|
7
|
+
def html5_support?
|
8
|
+
# Note that Loofah can only support HTML5 in Nokogiri >= 1.14.0 because it requires the
|
9
|
+
# subclassing fix from https://github.com/sparklemotion/nokogiri/pull/2534
|
10
|
+
unless @html5_support_set
|
11
|
+
@html5_support = (
|
12
|
+
Gem::Version.new(Nokogiri::VERSION) > Gem::Version.new("1.14.0") &&
|
13
|
+
Nokogiri.uses_gumbo?
|
14
|
+
)
|
15
|
+
@html5_support_set = true
|
16
|
+
end
|
17
|
+
@html5_support
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
require_relative "loofah/version"
|
23
|
+
require_relative "loofah/metahelpers"
|
24
|
+
require_relative "loofah/elements"
|
7
25
|
|
8
|
-
|
9
|
-
|
10
|
-
|
26
|
+
require_relative "loofah/html5/safelist"
|
27
|
+
require_relative "loofah/html5/libxml2_workarounds"
|
28
|
+
require_relative "loofah/html5/scrub"
|
11
29
|
|
12
|
-
|
13
|
-
|
30
|
+
require_relative "loofah/scrubber"
|
31
|
+
require_relative "loofah/scrubbers"
|
14
32
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
33
|
+
require_relative "loofah/concerns"
|
34
|
+
require_relative "loofah/xml/document"
|
35
|
+
require_relative "loofah/xml/document_fragment"
|
36
|
+
require_relative "loofah/html4/document"
|
37
|
+
require_relative "loofah/html4/document_fragment"
|
38
|
+
|
39
|
+
if Loofah.html5_support?
|
40
|
+
require_relative "loofah/html5/document"
|
41
|
+
require_relative "loofah/html5/document_fragment"
|
42
|
+
end
|
20
43
|
|
21
44
|
# == Strings and IO Objects as Input
|
22
45
|
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
46
|
+
# The following methods accept any IO object in addition to accepting a string:
|
47
|
+
#
|
48
|
+
# - Loofah.html4_document
|
49
|
+
# - Loofah.html4_fragment
|
50
|
+
# - Loofah.scrub_html4_document
|
51
|
+
# - Loofah.scrub_html4_fragment
|
52
|
+
#
|
53
|
+
# - Loofah.html5_document
|
54
|
+
# - Loofah.html5_fragment
|
55
|
+
# - Loofah.scrub_html5_document
|
56
|
+
# - Loofah.scrub_html5_fragment
|
57
|
+
#
|
58
|
+
# - Loofah.xml_document
|
59
|
+
# - Loofah.xml_fragment
|
60
|
+
# - Loofah.scrub_xml_document
|
61
|
+
# - Loofah.scrub_xml_fragment
|
62
|
+
#
|
63
|
+
# - Loofah.document
|
64
|
+
# - Loofah.fragment
|
65
|
+
# - Loofah.scrub_document
|
66
|
+
# - Loofah.scrub_fragment
|
67
|
+
#
|
68
|
+
# That IO object could be a file, or a socket, or a StringIO, or anything that responds to +read+
|
69
|
+
# and +close+.
|
28
70
|
#
|
29
71
|
module Loofah
|
30
|
-
#
|
31
|
-
|
72
|
+
# Alias for Loofah::HTML4
|
73
|
+
HTML = HTML4
|
32
74
|
|
33
75
|
class << self
|
34
|
-
# Shortcut for Loofah::
|
35
|
-
#
|
36
|
-
|
37
|
-
|
76
|
+
# Shortcut for Loofah::HTML4::Document.parse(*args, &block)
|
77
|
+
#
|
78
|
+
# This method accepts the same parameters as Nokogiri::HTML4::Document.parse
|
79
|
+
def html4_document(*args, &block)
|
80
|
+
Loofah::HTML4::Document.parse(*args, &block)
|
38
81
|
end
|
39
82
|
|
40
|
-
# Shortcut for Loofah::
|
41
|
-
#
|
42
|
-
|
43
|
-
|
83
|
+
# Shortcut for Loofah::HTML4::DocumentFragment.parse(*args, &block)
|
84
|
+
#
|
85
|
+
# This method accepts the same parameters as Nokogiri::HTML4::DocumentFragment.parse
|
86
|
+
def html4_fragment(*args, &block)
|
87
|
+
Loofah::HTML4::DocumentFragment.parse(*args, &block)
|
44
88
|
end
|
45
89
|
|
46
|
-
# Shortcut for Loofah.
|
47
|
-
def
|
48
|
-
Loofah.
|
90
|
+
# Shortcut for Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
|
91
|
+
def scrub_html4_document(string_or_io, method)
|
92
|
+
Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
|
49
93
|
end
|
50
94
|
|
51
|
-
# Shortcut for Loofah.
|
52
|
-
def
|
53
|
-
Loofah.
|
95
|
+
# Shortcut for Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
|
96
|
+
def scrub_html4_fragment(string_or_io, method)
|
97
|
+
Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
|
54
98
|
end
|
55
99
|
|
56
|
-
|
100
|
+
if Loofah.html5_support?
|
101
|
+
# Shortcut for Loofah::HTML5::Document.parse(*args, &block)
|
102
|
+
#
|
103
|
+
# This method accepts the same parameters as Nokogiri::HTML5::Document.parse
|
104
|
+
def html5_document(*args, &block)
|
105
|
+
Loofah::HTML5::Document.parse(*args, &block)
|
106
|
+
end
|
107
|
+
|
108
|
+
# Shortcut for Loofah::HTML5::DocumentFragment.parse(*args, &block)
|
109
|
+
#
|
110
|
+
# This method accepts the same parameters as Nokogiri::HTML5::DocumentFragment.parse
|
111
|
+
def html5_fragment(*args, &block)
|
112
|
+
Loofah::HTML5::DocumentFragment.parse(*args, &block)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Shortcut for Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
|
116
|
+
def scrub_html5_document(string_or_io, method)
|
117
|
+
Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
|
118
|
+
end
|
119
|
+
|
120
|
+
# Shortcut for Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
|
121
|
+
def scrub_html5_fragment(string_or_io, method)
|
122
|
+
Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
|
123
|
+
end
|
124
|
+
else
|
125
|
+
def html5_document(*args, &block)
|
126
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
127
|
+
end
|
128
|
+
|
129
|
+
def html5_fragment(*args, &block)
|
130
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
131
|
+
end
|
132
|
+
|
133
|
+
def scrub_html5_document(string_or_io, method)
|
134
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
135
|
+
end
|
136
|
+
|
137
|
+
def scrub_html5_fragment(string_or_io, method)
|
138
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
alias_method :document, :html4_document
|
143
|
+
alias_method :fragment, :html4_fragment
|
144
|
+
alias_method :scrub_document, :scrub_html4_document
|
145
|
+
alias_method :scrub_fragment, :scrub_html4_fragment
|
146
|
+
|
147
|
+
# Shortcut for Loofah::XML::Document.parse(*args, &block)
|
148
|
+
#
|
57
149
|
# This method accepts the same parameters as Nokogiri::XML::Document.parse
|
58
150
|
def xml_document(*args, &block)
|
59
151
|
Loofah::XML::Document.parse(*args, &block)
|
60
152
|
end
|
61
153
|
|
62
|
-
# Shortcut for Loofah::XML::DocumentFragment.parse
|
154
|
+
# Shortcut for Loofah::XML::DocumentFragment.parse(*args, &block)
|
155
|
+
#
|
63
156
|
# This method accepts the same parameters as Nokogiri::XML::DocumentFragment.parse
|
64
157
|
def xml_fragment(*args, &block)
|
65
158
|
Loofah::XML::DocumentFragment.parse(*args, &block)
|
@@ -77,7 +170,7 @@ module Loofah
|
|
77
170
|
|
78
171
|
# A helper to remove extraneous whitespace from text-ified HTML
|
79
172
|
def remove_extraneous_whitespace(string)
|
80
|
-
string.gsub(/\n\s*\n\s*\n/,"\n\n")
|
173
|
+
string.gsub(/\n\s*\n\s*\n/, "\n\n")
|
81
174
|
end
|
82
175
|
end
|
83
176
|
end
|