loofah 2.2.3 → 2.21.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +269 -31
  3. data/README.md +109 -124
  4. data/lib/loofah/concerns.rb +207 -0
  5. data/lib/loofah/elements.rb +85 -79
  6. data/lib/loofah/helpers.rb +37 -20
  7. data/lib/loofah/{html → html4}/document.rb +6 -7
  8. data/lib/loofah/html4/document_fragment.rb +15 -0
  9. data/lib/loofah/html5/document.rb +17 -0
  10. data/lib/loofah/html5/document_fragment.rb +15 -0
  11. data/lib/loofah/html5/libxml2_workarounds.rb +10 -8
  12. data/lib/loofah/html5/safelist.rb +1055 -0
  13. data/lib/loofah/html5/scrub.rb +153 -58
  14. data/lib/loofah/metahelpers.rb +11 -6
  15. data/lib/loofah/scrubber.rb +22 -15
  16. data/lib/loofah/scrubbers.rb +66 -55
  17. data/lib/loofah/version.rb +6 -0
  18. data/lib/loofah/xml/document.rb +2 -0
  19. data/lib/loofah/xml/document_fragment.rb +4 -7
  20. data/lib/loofah.rb +131 -38
  21. metadata +28 -216
  22. data/.gemtest +0 -0
  23. data/Gemfile +0 -22
  24. data/Manifest.txt +0 -40
  25. data/Rakefile +0 -79
  26. data/benchmark/benchmark.rb +0 -149
  27. data/benchmark/fragment.html +0 -96
  28. data/benchmark/helper.rb +0 -73
  29. data/benchmark/www.slashdot.com.html +0 -2560
  30. data/lib/loofah/html/document_fragment.rb +0 -40
  31. data/lib/loofah/html5/whitelist.rb +0 -186
  32. data/lib/loofah/instance_methods.rb +0 -127
  33. data/test/assets/msword.html +0 -63
  34. data/test/assets/testdata_sanitizer_tests1.dat +0 -502
  35. data/test/helper.rb +0 -18
  36. data/test/html5/test_sanitizer.rb +0 -382
  37. data/test/integration/test_ad_hoc.rb +0 -204
  38. data/test/integration/test_helpers.rb +0 -43
  39. data/test/integration/test_html.rb +0 -72
  40. data/test/integration/test_scrubbers.rb +0 -400
  41. data/test/integration/test_xml.rb +0 -55
  42. data/test/unit/test_api.rb +0 -142
  43. data/test/unit/test_encoding.rb +0 -20
  44. data/test/unit/test_helpers.rb +0 -62
  45. data/test/unit/test_scrubber.rb +0 -229
  46. data/test/unit/test_scrubbers.rb +0 -14
@@ -1,7 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loofah
2
4
  #
3
5
  # Loofah provides some built-in scrubbers for sanitizing with
4
- # HTML5lib's whitelist and for accomplishing some common
6
+ # HTML5lib's safelist and for accomplishing some common
5
7
  # transformation tasks.
6
8
  #
7
9
  #
@@ -10,7 +12,7 @@ module Loofah
10
12
  # +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
11
13
  #
12
14
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
13
- # Loofah.fragment(unsafe_html).scrub!(:strip)
15
+ # Loofah.html5_fragment(unsafe_html).scrub!(:strip)
14
16
  # => "ohai! <div>div is safe</div> but foo is <b>not</b>"
15
17
  #
16
18
  #
@@ -19,7 +21,7 @@ module Loofah
19
21
  # +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
20
22
  #
21
23
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
22
- # Loofah.fragment(unsafe_html).scrub!(:prune)
24
+ # Loofah.html5_fragment(unsafe_html).scrub!(:prune)
23
25
  # => "ohai! <div>div is safe</div> "
24
26
  #
25
27
  #
@@ -28,7 +30,7 @@ module Loofah
28
30
  # +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
29
31
  #
30
32
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
31
- # Loofah.fragment(unsafe_html).scrub!(:escape)
33
+ # Loofah.html5_fragment(unsafe_html).scrub!(:escape)
32
34
  # => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
33
35
  #
34
36
  #
@@ -40,7 +42,7 @@ module Loofah
40
42
  # layer of paint on top of the HTML input to make it look nice.
41
43
  #
42
44
  # messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
43
- # Loofah.fragment(messy_markup).scrub!(:whitewash)
45
+ # Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
44
46
  # => "ohai! <div>div with attributes</div>"
45
47
  #
46
48
  # One use case for this scrubber is to clean up HTML that was
@@ -55,7 +57,7 @@ module Loofah
55
57
  # +:nofollow+ adds a rel="nofollow" attribute to all links
56
58
  #
57
59
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
58
- # Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
60
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
59
61
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
60
62
  #
61
63
  #
@@ -64,7 +66,7 @@ module Loofah
64
66
  # +:noopener+ adds a rel="noopener" attribute to all links
65
67
  #
66
68
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
67
- # Loofah.fragment(link_farmers_markup).scrub!(:noopener)
69
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
68
70
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
69
71
  #
70
72
  #
@@ -73,7 +75,7 @@ module Loofah
73
75
  # +:unprintable+ removes unprintable Unicode characters.
74
76
  #
75
77
  # markup = "<p>Some text with an unprintable character at the end\u2028</p>"
76
- # Loofah.fragment(markup).scrub!(:unprintable)
78
+ # Loofah.html5_fragment(markup).scrub!(:unprintable)
77
79
  # => "<p>Some text with an unprintable character at the end</p>"
78
80
  #
79
81
  # You may not be able to see the unprintable character in the above example, but there is a
@@ -89,23 +91,20 @@ module Loofah
89
91
  # +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
90
92
  #
91
93
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
92
- # Loofah.fragment(unsafe_html).scrub!(:strip)
94
+ # Loofah.html5_fragment(unsafe_html).scrub!(:strip)
93
95
  # => "ohai! <div>div is safe</div> but foo is <b>not</b>"
94
96
  #
95
97
  class Strip < Scrubber
96
- def initialize
98
+ def initialize # rubocop:disable Lint/MissingSuper
97
99
  @direction = :bottom_up
98
100
  end
99
101
 
100
102
  def scrub(node)
101
103
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
102
- if node.children.length == 1 && node.children.first.cdata?
103
- sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
104
- node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
105
- else
106
- node.before node.children
107
- end
104
+
105
+ node.before(node.children)
108
106
  node.remove
107
+ STOP
109
108
  end
110
109
  end
111
110
 
@@ -115,18 +114,19 @@ module Loofah
115
114
  # +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
116
115
  #
117
116
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
118
- # Loofah.fragment(unsafe_html).scrub!(:prune)
117
+ # Loofah.html5_fragment(unsafe_html).scrub!(:prune)
119
118
  # => "ohai! <div>div is safe</div> "
120
119
  #
121
120
  class Prune < Scrubber
122
- def initialize
121
+ def initialize # rubocop:disable Lint/MissingSuper
123
122
  @direction = :top_down
124
123
  end
125
124
 
126
125
  def scrub(node)
127
126
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
127
+
128
128
  node.remove
129
- return STOP
129
+ STOP
130
130
  end
131
131
  end
132
132
 
@@ -136,19 +136,20 @@ module Loofah
136
136
  # +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
137
137
  #
138
138
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
139
- # Loofah.fragment(unsafe_html).scrub!(:escape)
139
+ # Loofah.html5_fragment(unsafe_html).scrub!(:escape)
140
140
  # => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
141
141
  #
142
142
  class Escape < Scrubber
143
- def initialize
143
+ def initialize # rubocop:disable Lint/MissingSuper
144
144
  @direction = :top_down
145
145
  end
146
146
 
147
147
  def scrub(node)
148
148
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
149
- node.add_next_sibling Nokogiri::XML::Text.new(node.to_s, node.document)
149
+
150
+ node.add_next_sibling(Nokogiri::XML::Text.new(node.to_s, node.document))
150
151
  node.remove
151
- return STOP
152
+ STOP
152
153
  end
153
154
  end
154
155
 
@@ -161,7 +162,7 @@ module Loofah
161
162
  # layer of paint on top of the HTML input to make it look nice.
162
163
  #
163
164
  # messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
164
- # Loofah.fragment(messy_markup).scrub!(:whitewash)
165
+ # Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
165
166
  # => "ohai! <div>div with attributes</div>"
166
167
  #
167
168
  # One use case for this scrubber is to clean up HTML that was
@@ -171,14 +172,14 @@ module Loofah
171
172
  # Certainly not me.
172
173
  #
173
174
  class Whitewash < Scrubber
174
- def initialize
175
+ def initialize # rubocop:disable Lint/MissingSuper
175
176
  @direction = :top_down
176
177
  end
177
178
 
178
179
  def scrub(node)
179
180
  case node.type
180
181
  when Nokogiri::XML::Node::ELEMENT_NODE
181
- if HTML5::Scrub.allowed_element? node.name
182
+ if HTML5::Scrub.allowed_element?(node.name)
182
183
  node.attributes.each { |attr| node.remove_attribute(attr.first) }
183
184
  return CONTINUE if node.namespaces.empty?
184
185
  end
@@ -196,18 +197,19 @@ module Loofah
196
197
  # +:nofollow+ adds a rel="nofollow" attribute to all links
197
198
  #
198
199
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
199
- # Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
200
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
200
201
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
201
202
  #
202
203
  class NoFollow < Scrubber
203
- def initialize
204
+ def initialize # rubocop:disable Lint/MissingSuper
204
205
  @direction = :top_down
205
206
  end
206
207
 
207
208
  def scrub(node)
208
- return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == 'a')
209
- append_attribute(node, 'rel', 'nofollow')
210
- return STOP
209
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
210
+
211
+ append_attribute(node, "rel", "nofollow")
212
+ STOP
211
213
  end
212
214
  end
213
215
 
@@ -217,30 +219,37 @@ module Loofah
217
219
  # +:noopener+ adds a rel="noopener" attribute to all links
218
220
  #
219
221
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
220
- # Loofah.fragment(link_farmers_markup).scrub!(:noopener)
222
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
221
223
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
222
224
  #
223
225
  class NoOpener < Scrubber
224
- def initialize
226
+ def initialize # rubocop:disable Lint/MissingSuper
225
227
  @direction = :top_down
226
228
  end
227
229
 
228
230
  def scrub(node)
229
- return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == 'a')
230
- append_attribute(node, 'rel', 'noopener')
231
- return STOP
231
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
232
+
233
+ append_attribute(node, "rel", "noopener")
234
+ STOP
232
235
  end
233
236
  end
234
237
 
235
238
  # This class probably isn't useful publicly, but is used for #to_text's current implemention
236
239
  class NewlineBlockElements < Scrubber # :nodoc:
237
- def initialize
240
+ def initialize # rubocop:disable Lint/MissingSuper
238
241
  @direction = :bottom_up
239
242
  end
240
243
 
241
244
  def scrub(node)
242
- return CONTINUE unless Loofah::Elements::BLOCK_LEVEL.include?(node.name)
243
- node.add_next_sibling Nokogiri::XML::Text.new("\n#{node.content}\n", node.document)
245
+ return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
246
+
247
+ replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
248
+ "\n"
249
+ else
250
+ "\n#{node.content}\n"
251
+ end
252
+ node.add_next_sibling(Nokogiri::XML::Text.new(replacement, node.document))
244
253
  node.remove
245
254
  end
246
255
  end
@@ -251,7 +260,7 @@ module Loofah
251
260
  # +:unprintable+ removes unprintable Unicode characters.
252
261
  #
253
262
  # markup = "<p>Some text with an unprintable character at the end\u2028</p>"
254
- # Loofah.fragment(markup).scrub!(:unprintable)
263
+ # Loofah.html5_fragment(markup).scrub!(:unprintable)
255
264
  # => "<p>Some text with an unprintable character at the end</p>"
256
265
  #
257
266
  # You may not be able to see the unprintable character in the above example, but there is a
@@ -261,13 +270,13 @@ module Loofah
261
270
  # http://timelessrepo.com/json-isnt-a-javascript-subset
262
271
  #
263
272
  class Unprintable < Scrubber
264
- def initialize
273
+ def initialize # rubocop:disable Lint/MissingSuper
265
274
  @direction = :top_down
266
275
  end
267
276
 
268
277
  def scrub(node)
269
278
  if node.type == Nokogiri::XML::Node::TEXT_NODE || node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE
270
- node.content = node.content.gsub(/\u2028|\u2029/, '')
279
+ node.content = node.content.gsub(/\u2028|\u2029/, "")
271
280
  end
272
281
  CONTINUE
273
282
  end
@@ -277,21 +286,23 @@ module Loofah
277
286
  # A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
278
287
  #
279
288
  MAP = {
280
- :escape => Escape,
281
- :prune => Prune,
282
- :whitewash => Whitewash,
283
- :strip => Strip,
284
- :nofollow => NoFollow,
285
- :noopener => NoOpener,
286
- :newline_block_elements => NewlineBlockElements,
287
- :unprintable => Unprintable
289
+ escape: Escape,
290
+ prune: Prune,
291
+ whitewash: Whitewash,
292
+ strip: Strip,
293
+ nofollow: NoFollow,
294
+ noopener: NoOpener,
295
+ newline_block_elements: NewlineBlockElements,
296
+ unprintable: Unprintable,
288
297
  }
289
298
 
290
- #
291
- # Returns an array of symbols representing the built-in scrubbers
292
- #
293
- def self.scrubber_symbols
294
- MAP.keys
299
+ class << self
300
+ #
301
+ # Returns an array of symbols representing the built-in scrubbers
302
+ #
303
+ def scrubber_symbols
304
+ MAP.keys
305
+ end
295
306
  end
296
307
  end
297
308
  end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ # The version of Loofah you are using
5
+ VERSION = "2.21.1"
6
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loofah
2
4
  module XML # :nodoc:
3
5
  #
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loofah
2
4
  module XML # :nodoc:
3
5
  #
@@ -7,15 +9,10 @@ module Loofah
7
9
  #
8
10
  class DocumentFragment < Nokogiri::XML::DocumentFragment
9
11
  class << self
10
- #
11
- # Overridden Nokogiri::XML::DocumentFragment
12
- # constructor. Applications should use Loofah.fragment to
13
- # parse a fragment.
14
- #
15
- def parse tags
12
+ def parse(tags)
16
13
  doc = Loofah::XML::Document.new
17
14
  doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
18
- self.new(doc, tags)
15
+ new(doc, tags)
19
16
  end
20
17
  end
21
18
  end
data/lib/loofah.rb CHANGED
@@ -1,65 +1,158 @@
1
- $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
1
+ # frozen_string_literal: true
2
2
 
3
- require 'nokogiri'
3
+ require "nokogiri"
4
4
 
5
- require 'loofah/metahelpers'
6
- require 'loofah/elements'
5
+ module Loofah
6
+ class << self
7
+ def html5_support?
8
+ # Note that Loofah can only support HTML5 in Nokogiri >= 1.14.0 because it requires the
9
+ # subclassing fix from https://github.com/sparklemotion/nokogiri/pull/2534
10
+ unless @html5_support_set
11
+ @html5_support = (
12
+ Gem::Version.new(Nokogiri::VERSION) > Gem::Version.new("1.14.0") &&
13
+ Nokogiri.uses_gumbo?
14
+ )
15
+ @html5_support_set = true
16
+ end
17
+ @html5_support
18
+ end
19
+ end
20
+ end
21
+
22
+ require_relative "loofah/version"
23
+ require_relative "loofah/metahelpers"
24
+ require_relative "loofah/elements"
7
25
 
8
- require 'loofah/html5/whitelist'
9
- require 'loofah/html5/libxml2_workarounds'
10
- require 'loofah/html5/scrub'
26
+ require_relative "loofah/html5/safelist"
27
+ require_relative "loofah/html5/libxml2_workarounds"
28
+ require_relative "loofah/html5/scrub"
11
29
 
12
- require 'loofah/scrubber'
13
- require 'loofah/scrubbers'
30
+ require_relative "loofah/scrubber"
31
+ require_relative "loofah/scrubbers"
14
32
 
15
- require 'loofah/instance_methods'
16
- require 'loofah/xml/document'
17
- require 'loofah/xml/document_fragment'
18
- require 'loofah/html/document'
19
- require 'loofah/html/document_fragment'
33
+ require_relative "loofah/concerns"
34
+ require_relative "loofah/xml/document"
35
+ require_relative "loofah/xml/document_fragment"
36
+ require_relative "loofah/html4/document"
37
+ require_relative "loofah/html4/document_fragment"
38
+
39
+ if Loofah.html5_support?
40
+ require_relative "loofah/html5/document"
41
+ require_relative "loofah/html5/document_fragment"
42
+ end
20
43
 
21
44
  # == Strings and IO Objects as Input
22
45
  #
23
- # Loofah.document and Loofah.fragment accept any IO object in addition
24
- # to accepting a string. That IO object could be a file, or a socket,
25
- # or a StringIO, or anything that responds to +read+ and
26
- # +close+. Which makes it particularly easy to sanitize mass
27
- # quantities of docs.
46
+ # The following methods accept any IO object in addition to accepting a string:
47
+ #
48
+ # - Loofah.html4_document
49
+ # - Loofah.html4_fragment
50
+ # - Loofah.scrub_html4_document
51
+ # - Loofah.scrub_html4_fragment
52
+ #
53
+ # - Loofah.html5_document
54
+ # - Loofah.html5_fragment
55
+ # - Loofah.scrub_html5_document
56
+ # - Loofah.scrub_html5_fragment
57
+ #
58
+ # - Loofah.xml_document
59
+ # - Loofah.xml_fragment
60
+ # - Loofah.scrub_xml_document
61
+ # - Loofah.scrub_xml_fragment
62
+ #
63
+ # - Loofah.document
64
+ # - Loofah.fragment
65
+ # - Loofah.scrub_document
66
+ # - Loofah.scrub_fragment
67
+ #
68
+ # That IO object could be a file, or a socket, or a StringIO, or anything that responds to +read+
69
+ # and +close+.
28
70
  #
29
71
  module Loofah
30
- # The version of Loofah you are using
31
- VERSION = '2.2.3'
72
+ # Alias for Loofah::HTML4
73
+ HTML = HTML4
32
74
 
33
75
  class << self
34
- # Shortcut for Loofah::HTML::Document.parse
35
- # This method accepts the same parameters as Nokogiri::HTML::Document.parse
36
- def document(*args, &block)
37
- Loofah::HTML::Document.parse(*args, &block)
76
+ # Shortcut for Loofah::HTML4::Document.parse(*args, &block)
77
+ #
78
+ # This method accepts the same parameters as Nokogiri::HTML4::Document.parse
79
+ def html4_document(*args, &block)
80
+ Loofah::HTML4::Document.parse(*args, &block)
38
81
  end
39
82
 
40
- # Shortcut for Loofah::HTML::DocumentFragment.parse
41
- # This method accepts the same parameters as Nokogiri::HTML::DocumentFragment.parse
42
- def fragment(*args, &block)
43
- Loofah::HTML::DocumentFragment.parse(*args, &block)
83
+ # Shortcut for Loofah::HTML4::DocumentFragment.parse(*args, &block)
84
+ #
85
+ # This method accepts the same parameters as Nokogiri::HTML4::DocumentFragment.parse
86
+ def html4_fragment(*args, &block)
87
+ Loofah::HTML4::DocumentFragment.parse(*args, &block)
44
88
  end
45
89
 
46
- # Shortcut for Loofah.fragment(string_or_io).scrub!(method)
47
- def scrub_fragment(string_or_io, method)
48
- Loofah.fragment(string_or_io).scrub!(method)
90
+ # Shortcut for Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
91
+ def scrub_html4_document(string_or_io, method)
92
+ Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
49
93
  end
50
94
 
51
- # Shortcut for Loofah.document(string_or_io).scrub!(method)
52
- def scrub_document(string_or_io, method)
53
- Loofah.document(string_or_io).scrub!(method)
95
+ # Shortcut for Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
96
+ def scrub_html4_fragment(string_or_io, method)
97
+ Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
54
98
  end
55
99
 
56
- # Shortcut for Loofah::XML::Document.parse
100
+ if Loofah.html5_support?
101
+ # Shortcut for Loofah::HTML5::Document.parse(*args, &block)
102
+ #
103
+ # This method accepts the same parameters as Nokogiri::HTML5::Document.parse
104
+ def html5_document(*args, &block)
105
+ Loofah::HTML5::Document.parse(*args, &block)
106
+ end
107
+
108
+ # Shortcut for Loofah::HTML5::DocumentFragment.parse(*args, &block)
109
+ #
110
+ # This method accepts the same parameters as Nokogiri::HTML5::DocumentFragment.parse
111
+ def html5_fragment(*args, &block)
112
+ Loofah::HTML5::DocumentFragment.parse(*args, &block)
113
+ end
114
+
115
+ # Shortcut for Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
116
+ def scrub_html5_document(string_or_io, method)
117
+ Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
118
+ end
119
+
120
+ # Shortcut for Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
121
+ def scrub_html5_fragment(string_or_io, method)
122
+ Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
123
+ end
124
+ else
125
+ def html5_document(*args, &block)
126
+ raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
127
+ end
128
+
129
+ def html5_fragment(*args, &block)
130
+ raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
131
+ end
132
+
133
+ def scrub_html5_document(string_or_io, method)
134
+ raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
135
+ end
136
+
137
+ def scrub_html5_fragment(string_or_io, method)
138
+ raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
139
+ end
140
+ end
141
+
142
+ alias_method :document, :html4_document
143
+ alias_method :fragment, :html4_fragment
144
+ alias_method :scrub_document, :scrub_html4_document
145
+ alias_method :scrub_fragment, :scrub_html4_fragment
146
+
147
+ # Shortcut for Loofah::XML::Document.parse(*args, &block)
148
+ #
57
149
  # This method accepts the same parameters as Nokogiri::XML::Document.parse
58
150
  def xml_document(*args, &block)
59
151
  Loofah::XML::Document.parse(*args, &block)
60
152
  end
61
153
 
62
- # Shortcut for Loofah::XML::DocumentFragment.parse
154
+ # Shortcut for Loofah::XML::DocumentFragment.parse(*args, &block)
155
+ #
63
156
  # This method accepts the same parameters as Nokogiri::XML::DocumentFragment.parse
64
157
  def xml_fragment(*args, &block)
65
158
  Loofah::XML::DocumentFragment.parse(*args, &block)
@@ -77,7 +170,7 @@ module Loofah
77
170
 
78
171
  # A helper to remove extraneous whitespace from text-ified HTML
79
172
  def remove_extraneous_whitespace(string)
80
- string.gsub(/\n\s*\n\s*\n/,"\n\n")
173
+ string.gsub(/\n\s*\n\s*\n/, "\n\n")
81
174
  end
82
175
  end
83
176
  end