loofah 2.2.3 → 2.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +269 -31
- data/README.md +109 -124
- data/lib/loofah/concerns.rb +207 -0
- data/lib/loofah/elements.rb +85 -79
- data/lib/loofah/helpers.rb +37 -20
- data/lib/loofah/{html → html4}/document.rb +6 -7
- data/lib/loofah/html4/document_fragment.rb +15 -0
- data/lib/loofah/html5/document.rb +17 -0
- data/lib/loofah/html5/document_fragment.rb +15 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +10 -8
- data/lib/loofah/html5/safelist.rb +1055 -0
- data/lib/loofah/html5/scrub.rb +153 -58
- data/lib/loofah/metahelpers.rb +11 -6
- data/lib/loofah/scrubber.rb +22 -15
- data/lib/loofah/scrubbers.rb +66 -55
- data/lib/loofah/version.rb +6 -0
- data/lib/loofah/xml/document.rb +2 -0
- data/lib/loofah/xml/document_fragment.rb +4 -7
- data/lib/loofah.rb +131 -38
- metadata +28 -216
- data/.gemtest +0 -0
- data/Gemfile +0 -22
- data/Manifest.txt +0 -40
- data/Rakefile +0 -79
- data/benchmark/benchmark.rb +0 -149
- data/benchmark/fragment.html +0 -96
- data/benchmark/helper.rb +0 -73
- data/benchmark/www.slashdot.com.html +0 -2560
- data/lib/loofah/html/document_fragment.rb +0 -40
- data/lib/loofah/html5/whitelist.rb +0 -186
- data/lib/loofah/instance_methods.rb +0 -127
- data/test/assets/msword.html +0 -63
- data/test/assets/testdata_sanitizer_tests1.dat +0 -502
- data/test/helper.rb +0 -18
- data/test/html5/test_sanitizer.rb +0 -382
- data/test/integration/test_ad_hoc.rb +0 -204
- data/test/integration/test_helpers.rb +0 -43
- data/test/integration/test_html.rb +0 -72
- data/test/integration/test_scrubbers.rb +0 -400
- data/test/integration/test_xml.rb +0 -55
- data/test/unit/test_api.rb +0 -142
- data/test/unit/test_encoding.rb +0 -20
- data/test/unit/test_helpers.rb +0 -62
- data/test/unit/test_scrubber.rb +0 -229
- data/test/unit/test_scrubbers.rb +0 -14
data/lib/loofah/scrubbers.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Loofah
|
2
4
|
#
|
3
5
|
# Loofah provides some built-in scrubbers for sanitizing with
|
4
|
-
# HTML5lib's
|
6
|
+
# HTML5lib's safelist and for accomplishing some common
|
5
7
|
# transformation tasks.
|
6
8
|
#
|
7
9
|
#
|
@@ -10,7 +12,7 @@ module Loofah
|
|
10
12
|
# +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
|
11
13
|
#
|
12
14
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
13
|
-
# Loofah.
|
15
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:strip)
|
14
16
|
# => "ohai! <div>div is safe</div> but foo is <b>not</b>"
|
15
17
|
#
|
16
18
|
#
|
@@ -19,7 +21,7 @@ module Loofah
|
|
19
21
|
# +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
|
20
22
|
#
|
21
23
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
22
|
-
# Loofah.
|
24
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:prune)
|
23
25
|
# => "ohai! <div>div is safe</div> "
|
24
26
|
#
|
25
27
|
#
|
@@ -28,7 +30,7 @@ module Loofah
|
|
28
30
|
# +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
|
29
31
|
#
|
30
32
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
31
|
-
# Loofah.
|
33
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:escape)
|
32
34
|
# => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
33
35
|
#
|
34
36
|
#
|
@@ -40,7 +42,7 @@ module Loofah
|
|
40
42
|
# layer of paint on top of the HTML input to make it look nice.
|
41
43
|
#
|
42
44
|
# messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
|
43
|
-
# Loofah.
|
45
|
+
# Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
|
44
46
|
# => "ohai! <div>div with attributes</div>"
|
45
47
|
#
|
46
48
|
# One use case for this scrubber is to clean up HTML that was
|
@@ -55,7 +57,7 @@ module Loofah
|
|
55
57
|
# +:nofollow+ adds a rel="nofollow" attribute to all links
|
56
58
|
#
|
57
59
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
58
|
-
# Loofah.
|
60
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
|
59
61
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
|
60
62
|
#
|
61
63
|
#
|
@@ -64,7 +66,7 @@ module Loofah
|
|
64
66
|
# +:noopener+ adds a rel="noopener" attribute to all links
|
65
67
|
#
|
66
68
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
67
|
-
# Loofah.
|
69
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
|
68
70
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
|
69
71
|
#
|
70
72
|
#
|
@@ -73,7 +75,7 @@ module Loofah
|
|
73
75
|
# +:unprintable+ removes unprintable Unicode characters.
|
74
76
|
#
|
75
77
|
# markup = "<p>Some text with an unprintable character at the end\u2028</p>"
|
76
|
-
# Loofah.
|
78
|
+
# Loofah.html5_fragment(markup).scrub!(:unprintable)
|
77
79
|
# => "<p>Some text with an unprintable character at the end</p>"
|
78
80
|
#
|
79
81
|
# You may not be able to see the unprintable character in the above example, but there is a
|
@@ -89,23 +91,20 @@ module Loofah
|
|
89
91
|
# +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
|
90
92
|
#
|
91
93
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
92
|
-
# Loofah.
|
94
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:strip)
|
93
95
|
# => "ohai! <div>div is safe</div> but foo is <b>not</b>"
|
94
96
|
#
|
95
97
|
class Strip < Scrubber
|
96
|
-
def initialize
|
98
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
97
99
|
@direction = :bottom_up
|
98
100
|
end
|
99
101
|
|
100
102
|
def scrub(node)
|
101
103
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
102
|
-
|
103
|
-
|
104
|
-
node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
|
105
|
-
else
|
106
|
-
node.before node.children
|
107
|
-
end
|
104
|
+
|
105
|
+
node.before(node.children)
|
108
106
|
node.remove
|
107
|
+
STOP
|
109
108
|
end
|
110
109
|
end
|
111
110
|
|
@@ -115,18 +114,19 @@ module Loofah
|
|
115
114
|
# +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
|
116
115
|
#
|
117
116
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
118
|
-
# Loofah.
|
117
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:prune)
|
119
118
|
# => "ohai! <div>div is safe</div> "
|
120
119
|
#
|
121
120
|
class Prune < Scrubber
|
122
|
-
def initialize
|
121
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
123
122
|
@direction = :top_down
|
124
123
|
end
|
125
124
|
|
126
125
|
def scrub(node)
|
127
126
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
127
|
+
|
128
128
|
node.remove
|
129
|
-
|
129
|
+
STOP
|
130
130
|
end
|
131
131
|
end
|
132
132
|
|
@@ -136,19 +136,20 @@ module Loofah
|
|
136
136
|
# +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
|
137
137
|
#
|
138
138
|
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
139
|
-
# Loofah.
|
139
|
+
# Loofah.html5_fragment(unsafe_html).scrub!(:escape)
|
140
140
|
# => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
141
141
|
#
|
142
142
|
class Escape < Scrubber
|
143
|
-
def initialize
|
143
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
144
144
|
@direction = :top_down
|
145
145
|
end
|
146
146
|
|
147
147
|
def scrub(node)
|
148
148
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
149
|
-
|
149
|
+
|
150
|
+
node.add_next_sibling(Nokogiri::XML::Text.new(node.to_s, node.document))
|
150
151
|
node.remove
|
151
|
-
|
152
|
+
STOP
|
152
153
|
end
|
153
154
|
end
|
154
155
|
|
@@ -161,7 +162,7 @@ module Loofah
|
|
161
162
|
# layer of paint on top of the HTML input to make it look nice.
|
162
163
|
#
|
163
164
|
# messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
|
164
|
-
# Loofah.
|
165
|
+
# Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
|
165
166
|
# => "ohai! <div>div with attributes</div>"
|
166
167
|
#
|
167
168
|
# One use case for this scrubber is to clean up HTML that was
|
@@ -171,14 +172,14 @@ module Loofah
|
|
171
172
|
# Certainly not me.
|
172
173
|
#
|
173
174
|
class Whitewash < Scrubber
|
174
|
-
def initialize
|
175
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
175
176
|
@direction = :top_down
|
176
177
|
end
|
177
178
|
|
178
179
|
def scrub(node)
|
179
180
|
case node.type
|
180
181
|
when Nokogiri::XML::Node::ELEMENT_NODE
|
181
|
-
if HTML5::Scrub.allowed_element?
|
182
|
+
if HTML5::Scrub.allowed_element?(node.name)
|
182
183
|
node.attributes.each { |attr| node.remove_attribute(attr.first) }
|
183
184
|
return CONTINUE if node.namespaces.empty?
|
184
185
|
end
|
@@ -196,18 +197,19 @@ module Loofah
|
|
196
197
|
# +:nofollow+ adds a rel="nofollow" attribute to all links
|
197
198
|
#
|
198
199
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
199
|
-
# Loofah.
|
200
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
|
200
201
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
|
201
202
|
#
|
202
203
|
class NoFollow < Scrubber
|
203
|
-
def initialize
|
204
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
204
205
|
@direction = :top_down
|
205
206
|
end
|
206
207
|
|
207
208
|
def scrub(node)
|
208
|
-
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name ==
|
209
|
-
|
210
|
-
|
209
|
+
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
210
|
+
|
211
|
+
append_attribute(node, "rel", "nofollow")
|
212
|
+
STOP
|
211
213
|
end
|
212
214
|
end
|
213
215
|
|
@@ -217,30 +219,37 @@ module Loofah
|
|
217
219
|
# +:noopener+ adds a rel="noopener" attribute to all links
|
218
220
|
#
|
219
221
|
# link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
|
220
|
-
# Loofah.
|
222
|
+
# Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
|
221
223
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
|
222
224
|
#
|
223
225
|
class NoOpener < Scrubber
|
224
|
-
def initialize
|
226
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
225
227
|
@direction = :top_down
|
226
228
|
end
|
227
229
|
|
228
230
|
def scrub(node)
|
229
|
-
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name ==
|
230
|
-
|
231
|
-
|
231
|
+
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
232
|
+
|
233
|
+
append_attribute(node, "rel", "noopener")
|
234
|
+
STOP
|
232
235
|
end
|
233
236
|
end
|
234
237
|
|
235
238
|
# This class probably isn't useful publicly, but is used for #to_text's current implemention
|
236
239
|
class NewlineBlockElements < Scrubber # :nodoc:
|
237
|
-
def initialize
|
240
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
238
241
|
@direction = :bottom_up
|
239
242
|
end
|
240
243
|
|
241
244
|
def scrub(node)
|
242
|
-
return CONTINUE unless Loofah::Elements::
|
243
|
-
|
245
|
+
return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
|
246
|
+
|
247
|
+
replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
|
248
|
+
"\n"
|
249
|
+
else
|
250
|
+
"\n#{node.content}\n"
|
251
|
+
end
|
252
|
+
node.add_next_sibling(Nokogiri::XML::Text.new(replacement, node.document))
|
244
253
|
node.remove
|
245
254
|
end
|
246
255
|
end
|
@@ -251,7 +260,7 @@ module Loofah
|
|
251
260
|
# +:unprintable+ removes unprintable Unicode characters.
|
252
261
|
#
|
253
262
|
# markup = "<p>Some text with an unprintable character at the end\u2028</p>"
|
254
|
-
# Loofah.
|
263
|
+
# Loofah.html5_fragment(markup).scrub!(:unprintable)
|
255
264
|
# => "<p>Some text with an unprintable character at the end</p>"
|
256
265
|
#
|
257
266
|
# You may not be able to see the unprintable character in the above example, but there is a
|
@@ -261,13 +270,13 @@ module Loofah
|
|
261
270
|
# http://timelessrepo.com/json-isnt-a-javascript-subset
|
262
271
|
#
|
263
272
|
class Unprintable < Scrubber
|
264
|
-
def initialize
|
273
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
265
274
|
@direction = :top_down
|
266
275
|
end
|
267
276
|
|
268
277
|
def scrub(node)
|
269
278
|
if node.type == Nokogiri::XML::Node::TEXT_NODE || node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE
|
270
|
-
node.content = node.content.gsub(/\u2028|\u2029/,
|
279
|
+
node.content = node.content.gsub(/\u2028|\u2029/, "")
|
271
280
|
end
|
272
281
|
CONTINUE
|
273
282
|
end
|
@@ -277,21 +286,23 @@ module Loofah
|
|
277
286
|
# A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
|
278
287
|
#
|
279
288
|
MAP = {
|
280
|
-
:
|
281
|
-
:
|
282
|
-
:
|
283
|
-
:
|
284
|
-
:
|
285
|
-
:
|
286
|
-
:
|
287
|
-
:
|
289
|
+
escape: Escape,
|
290
|
+
prune: Prune,
|
291
|
+
whitewash: Whitewash,
|
292
|
+
strip: Strip,
|
293
|
+
nofollow: NoFollow,
|
294
|
+
noopener: NoOpener,
|
295
|
+
newline_block_elements: NewlineBlockElements,
|
296
|
+
unprintable: Unprintable,
|
288
297
|
}
|
289
298
|
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
299
|
+
class << self
|
300
|
+
#
|
301
|
+
# Returns an array of symbols representing the built-in scrubbers
|
302
|
+
#
|
303
|
+
def scrubber_symbols
|
304
|
+
MAP.keys
|
305
|
+
end
|
295
306
|
end
|
296
307
|
end
|
297
308
|
end
|
data/lib/loofah/xml/document.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Loofah
|
2
4
|
module XML # :nodoc:
|
3
5
|
#
|
@@ -7,15 +9,10 @@ module Loofah
|
|
7
9
|
#
|
8
10
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
9
11
|
class << self
|
10
|
-
|
11
|
-
# Overridden Nokogiri::XML::DocumentFragment
|
12
|
-
# constructor. Applications should use Loofah.fragment to
|
13
|
-
# parse a fragment.
|
14
|
-
#
|
15
|
-
def parse tags
|
12
|
+
def parse(tags)
|
16
13
|
doc = Loofah::XML::Document.new
|
17
14
|
doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
|
18
|
-
|
15
|
+
new(doc, tags)
|
19
16
|
end
|
20
17
|
end
|
21
18
|
end
|
data/lib/loofah.rb
CHANGED
@@ -1,65 +1,158 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "nokogiri"
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
module Loofah
|
6
|
+
class << self
|
7
|
+
def html5_support?
|
8
|
+
# Note that Loofah can only support HTML5 in Nokogiri >= 1.14.0 because it requires the
|
9
|
+
# subclassing fix from https://github.com/sparklemotion/nokogiri/pull/2534
|
10
|
+
unless @html5_support_set
|
11
|
+
@html5_support = (
|
12
|
+
Gem::Version.new(Nokogiri::VERSION) > Gem::Version.new("1.14.0") &&
|
13
|
+
Nokogiri.uses_gumbo?
|
14
|
+
)
|
15
|
+
@html5_support_set = true
|
16
|
+
end
|
17
|
+
@html5_support
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
require_relative "loofah/version"
|
23
|
+
require_relative "loofah/metahelpers"
|
24
|
+
require_relative "loofah/elements"
|
7
25
|
|
8
|
-
|
9
|
-
|
10
|
-
|
26
|
+
require_relative "loofah/html5/safelist"
|
27
|
+
require_relative "loofah/html5/libxml2_workarounds"
|
28
|
+
require_relative "loofah/html5/scrub"
|
11
29
|
|
12
|
-
|
13
|
-
|
30
|
+
require_relative "loofah/scrubber"
|
31
|
+
require_relative "loofah/scrubbers"
|
14
32
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
33
|
+
require_relative "loofah/concerns"
|
34
|
+
require_relative "loofah/xml/document"
|
35
|
+
require_relative "loofah/xml/document_fragment"
|
36
|
+
require_relative "loofah/html4/document"
|
37
|
+
require_relative "loofah/html4/document_fragment"
|
38
|
+
|
39
|
+
if Loofah.html5_support?
|
40
|
+
require_relative "loofah/html5/document"
|
41
|
+
require_relative "loofah/html5/document_fragment"
|
42
|
+
end
|
20
43
|
|
21
44
|
# == Strings and IO Objects as Input
|
22
45
|
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
46
|
+
# The following methods accept any IO object in addition to accepting a string:
|
47
|
+
#
|
48
|
+
# - Loofah.html4_document
|
49
|
+
# - Loofah.html4_fragment
|
50
|
+
# - Loofah.scrub_html4_document
|
51
|
+
# - Loofah.scrub_html4_fragment
|
52
|
+
#
|
53
|
+
# - Loofah.html5_document
|
54
|
+
# - Loofah.html5_fragment
|
55
|
+
# - Loofah.scrub_html5_document
|
56
|
+
# - Loofah.scrub_html5_fragment
|
57
|
+
#
|
58
|
+
# - Loofah.xml_document
|
59
|
+
# - Loofah.xml_fragment
|
60
|
+
# - Loofah.scrub_xml_document
|
61
|
+
# - Loofah.scrub_xml_fragment
|
62
|
+
#
|
63
|
+
# - Loofah.document
|
64
|
+
# - Loofah.fragment
|
65
|
+
# - Loofah.scrub_document
|
66
|
+
# - Loofah.scrub_fragment
|
67
|
+
#
|
68
|
+
# That IO object could be a file, or a socket, or a StringIO, or anything that responds to +read+
|
69
|
+
# and +close+.
|
28
70
|
#
|
29
71
|
module Loofah
|
30
|
-
#
|
31
|
-
|
72
|
+
# Alias for Loofah::HTML4
|
73
|
+
HTML = HTML4
|
32
74
|
|
33
75
|
class << self
|
34
|
-
# Shortcut for Loofah::
|
35
|
-
#
|
36
|
-
|
37
|
-
|
76
|
+
# Shortcut for Loofah::HTML4::Document.parse(*args, &block)
|
77
|
+
#
|
78
|
+
# This method accepts the same parameters as Nokogiri::HTML4::Document.parse
|
79
|
+
def html4_document(*args, &block)
|
80
|
+
Loofah::HTML4::Document.parse(*args, &block)
|
38
81
|
end
|
39
82
|
|
40
|
-
# Shortcut for Loofah::
|
41
|
-
#
|
42
|
-
|
43
|
-
|
83
|
+
# Shortcut for Loofah::HTML4::DocumentFragment.parse(*args, &block)
|
84
|
+
#
|
85
|
+
# This method accepts the same parameters as Nokogiri::HTML4::DocumentFragment.parse
|
86
|
+
def html4_fragment(*args, &block)
|
87
|
+
Loofah::HTML4::DocumentFragment.parse(*args, &block)
|
44
88
|
end
|
45
89
|
|
46
|
-
# Shortcut for Loofah.
|
47
|
-
def
|
48
|
-
Loofah.
|
90
|
+
# Shortcut for Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
|
91
|
+
def scrub_html4_document(string_or_io, method)
|
92
|
+
Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
|
49
93
|
end
|
50
94
|
|
51
|
-
# Shortcut for Loofah.
|
52
|
-
def
|
53
|
-
Loofah.
|
95
|
+
# Shortcut for Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
|
96
|
+
def scrub_html4_fragment(string_or_io, method)
|
97
|
+
Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
|
54
98
|
end
|
55
99
|
|
56
|
-
|
100
|
+
if Loofah.html5_support?
|
101
|
+
# Shortcut for Loofah::HTML5::Document.parse(*args, &block)
|
102
|
+
#
|
103
|
+
# This method accepts the same parameters as Nokogiri::HTML5::Document.parse
|
104
|
+
def html5_document(*args, &block)
|
105
|
+
Loofah::HTML5::Document.parse(*args, &block)
|
106
|
+
end
|
107
|
+
|
108
|
+
# Shortcut for Loofah::HTML5::DocumentFragment.parse(*args, &block)
|
109
|
+
#
|
110
|
+
# This method accepts the same parameters as Nokogiri::HTML5::DocumentFragment.parse
|
111
|
+
def html5_fragment(*args, &block)
|
112
|
+
Loofah::HTML5::DocumentFragment.parse(*args, &block)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Shortcut for Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
|
116
|
+
def scrub_html5_document(string_or_io, method)
|
117
|
+
Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
|
118
|
+
end
|
119
|
+
|
120
|
+
# Shortcut for Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
|
121
|
+
def scrub_html5_fragment(string_or_io, method)
|
122
|
+
Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
|
123
|
+
end
|
124
|
+
else
|
125
|
+
def html5_document(*args, &block)
|
126
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
127
|
+
end
|
128
|
+
|
129
|
+
def html5_fragment(*args, &block)
|
130
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
131
|
+
end
|
132
|
+
|
133
|
+
def scrub_html5_document(string_or_io, method)
|
134
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
135
|
+
end
|
136
|
+
|
137
|
+
def scrub_html5_fragment(string_or_io, method)
|
138
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
alias_method :document, :html4_document
|
143
|
+
alias_method :fragment, :html4_fragment
|
144
|
+
alias_method :scrub_document, :scrub_html4_document
|
145
|
+
alias_method :scrub_fragment, :scrub_html4_fragment
|
146
|
+
|
147
|
+
# Shortcut for Loofah::XML::Document.parse(*args, &block)
|
148
|
+
#
|
57
149
|
# This method accepts the same parameters as Nokogiri::XML::Document.parse
|
58
150
|
def xml_document(*args, &block)
|
59
151
|
Loofah::XML::Document.parse(*args, &block)
|
60
152
|
end
|
61
153
|
|
62
|
-
# Shortcut for Loofah::XML::DocumentFragment.parse
|
154
|
+
# Shortcut for Loofah::XML::DocumentFragment.parse(*args, &block)
|
155
|
+
#
|
63
156
|
# This method accepts the same parameters as Nokogiri::XML::DocumentFragment.parse
|
64
157
|
def xml_fragment(*args, &block)
|
65
158
|
Loofah::XML::DocumentFragment.parse(*args, &block)
|
@@ -77,7 +170,7 @@ module Loofah
|
|
77
170
|
|
78
171
|
# A helper to remove extraneous whitespace from text-ified HTML
|
79
172
|
def remove_extraneous_whitespace(string)
|
80
|
-
string.gsub(/\n\s*\n\s*\n/,"\n\n")
|
173
|
+
string.gsub(/\n\s*\n\s*\n/, "\n\n")
|
81
174
|
end
|
82
175
|
end
|
83
176
|
end
|