loofah 2.2.3 → 2.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +269 -31
  3. data/README.md +109 -124
  4. data/lib/loofah/concerns.rb +207 -0
  5. data/lib/loofah/elements.rb +85 -79
  6. data/lib/loofah/helpers.rb +37 -20
  7. data/lib/loofah/{html → html4}/document.rb +6 -7
  8. data/lib/loofah/html4/document_fragment.rb +15 -0
  9. data/lib/loofah/html5/document.rb +17 -0
  10. data/lib/loofah/html5/document_fragment.rb +15 -0
  11. data/lib/loofah/html5/libxml2_workarounds.rb +10 -8
  12. data/lib/loofah/html5/safelist.rb +1055 -0
  13. data/lib/loofah/html5/scrub.rb +153 -58
  14. data/lib/loofah/metahelpers.rb +11 -6
  15. data/lib/loofah/scrubber.rb +22 -15
  16. data/lib/loofah/scrubbers.rb +66 -55
  17. data/lib/loofah/version.rb +6 -0
  18. data/lib/loofah/xml/document.rb +2 -0
  19. data/lib/loofah/xml/document_fragment.rb +4 -7
  20. data/lib/loofah.rb +131 -38
  21. metadata +28 -216
  22. data/.gemtest +0 -0
  23. data/Gemfile +0 -22
  24. data/Manifest.txt +0 -40
  25. data/Rakefile +0 -79
  26. data/benchmark/benchmark.rb +0 -149
  27. data/benchmark/fragment.html +0 -96
  28. data/benchmark/helper.rb +0 -73
  29. data/benchmark/www.slashdot.com.html +0 -2560
  30. data/lib/loofah/html/document_fragment.rb +0 -40
  31. data/lib/loofah/html5/whitelist.rb +0 -186
  32. data/lib/loofah/instance_methods.rb +0 -127
  33. data/test/assets/msword.html +0 -63
  34. data/test/assets/testdata_sanitizer_tests1.dat +0 -502
  35. data/test/helper.rb +0 -18
  36. data/test/html5/test_sanitizer.rb +0 -382
  37. data/test/integration/test_ad_hoc.rb +0 -204
  38. data/test/integration/test_helpers.rb +0 -43
  39. data/test/integration/test_html.rb +0 -72
  40. data/test/integration/test_scrubbers.rb +0 -400
  41. data/test/integration/test_xml.rb +0 -55
  42. data/test/unit/test_api.rb +0 -142
  43. data/test/unit/test_encoding.rb +0 -20
  44. data/test/unit/test_helpers.rb +0 -62
  45. data/test/unit/test_scrubber.rb +0 -229
  46. data/test/unit/test_scrubbers.rb +0 -14
@@ -1,7 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loofah
2
4
  #
3
5
  # Loofah provides some built-in scrubbers for sanitizing with
4
- # HTML5lib's whitelist and for accomplishing some common
6
+ # HTML5lib's safelist and for accomplishing some common
5
7
  # transformation tasks.
6
8
  #
7
9
  #
@@ -10,7 +12,7 @@ module Loofah
10
12
  # +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
11
13
  #
12
14
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
13
- # Loofah.fragment(unsafe_html).scrub!(:strip)
15
+ # Loofah.html5_fragment(unsafe_html).scrub!(:strip)
14
16
  # => "ohai! <div>div is safe</div> but foo is <b>not</b>"
15
17
  #
16
18
  #
@@ -19,7 +21,7 @@ module Loofah
19
21
  # +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
20
22
  #
21
23
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
22
- # Loofah.fragment(unsafe_html).scrub!(:prune)
24
+ # Loofah.html5_fragment(unsafe_html).scrub!(:prune)
23
25
  # => "ohai! <div>div is safe</div> "
24
26
  #
25
27
  #
@@ -28,7 +30,7 @@ module Loofah
28
30
  # +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
29
31
  #
30
32
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
31
- # Loofah.fragment(unsafe_html).scrub!(:escape)
33
+ # Loofah.html5_fragment(unsafe_html).scrub!(:escape)
32
34
  # => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
33
35
  #
34
36
  #
@@ -40,7 +42,7 @@ module Loofah
40
42
  # layer of paint on top of the HTML input to make it look nice.
41
43
  #
42
44
  # messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
43
- # Loofah.fragment(messy_markup).scrub!(:whitewash)
45
+ # Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
44
46
  # => "ohai! <div>div with attributes</div>"
45
47
  #
46
48
  # One use case for this scrubber is to clean up HTML that was
@@ -55,7 +57,7 @@ module Loofah
55
57
  # +:nofollow+ adds a rel="nofollow" attribute to all links
56
58
  #
57
59
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
58
- # Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
60
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
59
61
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
60
62
  #
61
63
  #
@@ -64,7 +66,7 @@ module Loofah
64
66
  # +:noopener+ adds a rel="noopener" attribute to all links
65
67
  #
66
68
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
67
- # Loofah.fragment(link_farmers_markup).scrub!(:noopener)
69
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
68
70
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
69
71
  #
70
72
  #
@@ -73,7 +75,7 @@ module Loofah
73
75
  # +:unprintable+ removes unprintable Unicode characters.
74
76
  #
75
77
  # markup = "<p>Some text with an unprintable character at the end\u2028</p>"
76
- # Loofah.fragment(markup).scrub!(:unprintable)
78
+ # Loofah.html5_fragment(markup).scrub!(:unprintable)
77
79
  # => "<p>Some text with an unprintable character at the end</p>"
78
80
  #
79
81
  # You may not be able to see the unprintable character in the above example, but there is a
@@ -89,23 +91,20 @@ module Loofah
89
91
  # +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
90
92
  #
91
93
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
92
- # Loofah.fragment(unsafe_html).scrub!(:strip)
94
+ # Loofah.html5_fragment(unsafe_html).scrub!(:strip)
93
95
  # => "ohai! <div>div is safe</div> but foo is <b>not</b>"
94
96
  #
95
97
  class Strip < Scrubber
96
- def initialize
98
+ def initialize # rubocop:disable Lint/MissingSuper
97
99
  @direction = :bottom_up
98
100
  end
99
101
 
100
102
  def scrub(node)
101
103
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
102
- if node.children.length == 1 && node.children.first.cdata?
103
- sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
104
- node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
105
- else
106
- node.before node.children
107
- end
104
+
105
+ node.before(node.children)
108
106
  node.remove
107
+ STOP
109
108
  end
110
109
  end
111
110
 
@@ -115,18 +114,19 @@ module Loofah
115
114
  # +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
116
115
  #
117
116
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
118
- # Loofah.fragment(unsafe_html).scrub!(:prune)
117
+ # Loofah.html5_fragment(unsafe_html).scrub!(:prune)
119
118
  # => "ohai! <div>div is safe</div> "
120
119
  #
121
120
  class Prune < Scrubber
122
- def initialize
121
+ def initialize # rubocop:disable Lint/MissingSuper
123
122
  @direction = :top_down
124
123
  end
125
124
 
126
125
  def scrub(node)
127
126
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
127
+
128
128
  node.remove
129
- return STOP
129
+ STOP
130
130
  end
131
131
  end
132
132
 
@@ -136,19 +136,20 @@ module Loofah
136
136
  # +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
137
137
  #
138
138
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
139
- # Loofah.fragment(unsafe_html).scrub!(:escape)
139
+ # Loofah.html5_fragment(unsafe_html).scrub!(:escape)
140
140
  # => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
141
141
  #
142
142
  class Escape < Scrubber
143
- def initialize
143
+ def initialize # rubocop:disable Lint/MissingSuper
144
144
  @direction = :top_down
145
145
  end
146
146
 
147
147
  def scrub(node)
148
148
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
149
- node.add_next_sibling Nokogiri::XML::Text.new(node.to_s, node.document)
149
+
150
+ node.add_next_sibling(Nokogiri::XML::Text.new(node.to_s, node.document))
150
151
  node.remove
151
- return STOP
152
+ STOP
152
153
  end
153
154
  end
154
155
 
@@ -161,7 +162,7 @@ module Loofah
161
162
  # layer of paint on top of the HTML input to make it look nice.
162
163
  #
163
164
  # messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
164
- # Loofah.fragment(messy_markup).scrub!(:whitewash)
165
+ # Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
165
166
  # => "ohai! <div>div with attributes</div>"
166
167
  #
167
168
  # One use case for this scrubber is to clean up HTML that was
@@ -171,14 +172,14 @@ module Loofah
171
172
  # Certainly not me.
172
173
  #
173
174
  class Whitewash < Scrubber
174
- def initialize
175
+ def initialize # rubocop:disable Lint/MissingSuper
175
176
  @direction = :top_down
176
177
  end
177
178
 
178
179
  def scrub(node)
179
180
  case node.type
180
181
  when Nokogiri::XML::Node::ELEMENT_NODE
181
- if HTML5::Scrub.allowed_element? node.name
182
+ if HTML5::Scrub.allowed_element?(node.name)
182
183
  node.attributes.each { |attr| node.remove_attribute(attr.first) }
183
184
  return CONTINUE if node.namespaces.empty?
184
185
  end
@@ -196,18 +197,19 @@ module Loofah
196
197
  # +:nofollow+ adds a rel="nofollow" attribute to all links
197
198
  #
198
199
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
199
- # Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
200
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
200
201
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
201
202
  #
202
203
  class NoFollow < Scrubber
203
- def initialize
204
+ def initialize # rubocop:disable Lint/MissingSuper
204
205
  @direction = :top_down
205
206
  end
206
207
 
207
208
  def scrub(node)
208
- return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == 'a')
209
- append_attribute(node, 'rel', 'nofollow')
210
- return STOP
209
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
210
+
211
+ append_attribute(node, "rel", "nofollow")
212
+ STOP
211
213
  end
212
214
  end
213
215
 
@@ -217,30 +219,37 @@ module Loofah
217
219
  # +:noopener+ adds a rel="noopener" attribute to all links
218
220
  #
219
221
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
220
- # Loofah.fragment(link_farmers_markup).scrub!(:noopener)
222
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
221
223
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
222
224
  #
223
225
  class NoOpener < Scrubber
224
- def initialize
226
+ def initialize # rubocop:disable Lint/MissingSuper
225
227
  @direction = :top_down
226
228
  end
227
229
 
228
230
  def scrub(node)
229
- return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == 'a')
230
- append_attribute(node, 'rel', 'noopener')
231
- return STOP
231
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
232
+
233
+ append_attribute(node, "rel", "noopener")
234
+ STOP
232
235
  end
233
236
  end
234
237
 
235
238
  # This class probably isn't useful publicly, but is used for #to_text's current implemention
236
239
  class NewlineBlockElements < Scrubber # :nodoc:
237
- def initialize
240
+ def initialize # rubocop:disable Lint/MissingSuper
238
241
  @direction = :bottom_up
239
242
  end
240
243
 
241
244
  def scrub(node)
242
- return CONTINUE unless Loofah::Elements::BLOCK_LEVEL.include?(node.name)
243
- node.add_next_sibling Nokogiri::XML::Text.new("\n#{node.content}\n", node.document)
245
+ return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
246
+
247
+ replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
248
+ "\n"
249
+ else
250
+ "\n#{node.content}\n"
251
+ end
252
+ node.add_next_sibling(Nokogiri::XML::Text.new(replacement, node.document))
244
253
  node.remove
245
254
  end
246
255
  end
@@ -251,7 +260,7 @@ module Loofah
251
260
  # +:unprintable+ removes unprintable Unicode characters.
252
261
  #
253
262
  # markup = "<p>Some text with an unprintable character at the end\u2028</p>"
254
- # Loofah.fragment(markup).scrub!(:unprintable)
263
+ # Loofah.html5_fragment(markup).scrub!(:unprintable)
255
264
  # => "<p>Some text with an unprintable character at the end</p>"
256
265
  #
257
266
  # You may not be able to see the unprintable character in the above example, but there is a
@@ -261,13 +270,13 @@ module Loofah
261
270
  # http://timelessrepo.com/json-isnt-a-javascript-subset
262
271
  #
263
272
  class Unprintable < Scrubber
264
- def initialize
273
+ def initialize # rubocop:disable Lint/MissingSuper
265
274
  @direction = :top_down
266
275
  end
267
276
 
268
277
  def scrub(node)
269
278
  if node.type == Nokogiri::XML::Node::TEXT_NODE || node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE
270
- node.content = node.content.gsub(/\u2028|\u2029/, '')
279
+ node.content = node.content.gsub(/\u2028|\u2029/, "")
271
280
  end
272
281
  CONTINUE
273
282
  end
@@ -277,21 +286,23 @@ module Loofah
277
286
  # A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
278
287
  #
279
288
  MAP = {
280
- :escape => Escape,
281
- :prune => Prune,
282
- :whitewash => Whitewash,
283
- :strip => Strip,
284
- :nofollow => NoFollow,
285
- :noopener => NoOpener,
286
- :newline_block_elements => NewlineBlockElements,
287
- :unprintable => Unprintable
289
+ escape: Escape,
290
+ prune: Prune,
291
+ whitewash: Whitewash,
292
+ strip: Strip,
293
+ nofollow: NoFollow,
294
+ noopener: NoOpener,
295
+ newline_block_elements: NewlineBlockElements,
296
+ unprintable: Unprintable,
288
297
  }
289
298
 
290
- #
291
- # Returns an array of symbols representing the built-in scrubbers
292
- #
293
- def self.scrubber_symbols
294
- MAP.keys
299
+ class << self
300
+ #
301
+ # Returns an array of symbols representing the built-in scrubbers
302
+ #
303
+ def scrubber_symbols
304
+ MAP.keys
305
+ end
295
306
  end
296
307
  end
297
308
  end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ # The version of Loofah you are using
5
+ VERSION = "2.21.1"
6
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loofah
2
4
  module XML # :nodoc:
3
5
  #
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loofah
2
4
  module XML # :nodoc:
3
5
  #
@@ -7,15 +9,10 @@ module Loofah
7
9
  #
8
10
  class DocumentFragment < Nokogiri::XML::DocumentFragment
9
11
  class << self
10
- #
11
- # Overridden Nokogiri::XML::DocumentFragment
12
- # constructor. Applications should use Loofah.fragment to
13
- # parse a fragment.
14
- #
15
- def parse tags
12
+ def parse(tags)
16
13
  doc = Loofah::XML::Document.new
17
14
  doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
18
- self.new(doc, tags)
15
+ new(doc, tags)
19
16
  end
20
17
  end
21
18
  end
data/lib/loofah.rb CHANGED
@@ -1,65 +1,158 @@
1
- $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
1
+ # frozen_string_literal: true
2
2
 
3
- require 'nokogiri'
3
+ require "nokogiri"
4
4
 
5
- require 'loofah/metahelpers'
6
- require 'loofah/elements'
5
+ module Loofah
6
+ class << self
7
+ def html5_support?
8
+ # Note that Loofah can only support HTML5 in Nokogiri >= 1.14.0 because it requires the
9
+ # subclassing fix from https://github.com/sparklemotion/nokogiri/pull/2534
10
+ unless @html5_support_set
11
+ @html5_support = (
12
+ Gem::Version.new(Nokogiri::VERSION) > Gem::Version.new("1.14.0") &&
13
+ Nokogiri.uses_gumbo?
14
+ )
15
+ @html5_support_set = true
16
+ end
17
+ @html5_support
18
+ end
19
+ end
20
+ end
21
+
22
+ require_relative "loofah/version"
23
+ require_relative "loofah/metahelpers"
24
+ require_relative "loofah/elements"
7
25
 
8
- require 'loofah/html5/whitelist'
9
- require 'loofah/html5/libxml2_workarounds'
10
- require 'loofah/html5/scrub'
26
+ require_relative "loofah/html5/safelist"
27
+ require_relative "loofah/html5/libxml2_workarounds"
28
+ require_relative "loofah/html5/scrub"
11
29
 
12
- require 'loofah/scrubber'
13
- require 'loofah/scrubbers'
30
+ require_relative "loofah/scrubber"
31
+ require_relative "loofah/scrubbers"
14
32
 
15
- require 'loofah/instance_methods'
16
- require 'loofah/xml/document'
17
- require 'loofah/xml/document_fragment'
18
- require 'loofah/html/document'
19
- require 'loofah/html/document_fragment'
33
+ require_relative "loofah/concerns"
34
+ require_relative "loofah/xml/document"
35
+ require_relative "loofah/xml/document_fragment"
36
+ require_relative "loofah/html4/document"
37
+ require_relative "loofah/html4/document_fragment"
38
+
39
+ if Loofah.html5_support?
40
+ require_relative "loofah/html5/document"
41
+ require_relative "loofah/html5/document_fragment"
42
+ end
20
43
 
21
44
  # == Strings and IO Objects as Input
22
45
  #
23
- # Loofah.document and Loofah.fragment accept any IO object in addition
24
- # to accepting a string. That IO object could be a file, or a socket,
25
- # or a StringIO, or anything that responds to +read+ and
26
- # +close+. Which makes it particularly easy to sanitize mass
27
- # quantities of docs.
46
+ # The following methods accept any IO object in addition to accepting a string:
47
+ #
48
+ # - Loofah.html4_document
49
+ # - Loofah.html4_fragment
50
+ # - Loofah.scrub_html4_document
51
+ # - Loofah.scrub_html4_fragment
52
+ #
53
+ # - Loofah.html5_document
54
+ # - Loofah.html5_fragment
55
+ # - Loofah.scrub_html5_document
56
+ # - Loofah.scrub_html5_fragment
57
+ #
58
+ # - Loofah.xml_document
59
+ # - Loofah.xml_fragment
60
+ # - Loofah.scrub_xml_document
61
+ # - Loofah.scrub_xml_fragment
62
+ #
63
+ # - Loofah.document
64
+ # - Loofah.fragment
65
+ # - Loofah.scrub_document
66
+ # - Loofah.scrub_fragment
67
+ #
68
+ # That IO object could be a file, or a socket, or a StringIO, or anything that responds to +read+
69
+ # and +close+.
28
70
  #
29
71
  module Loofah
30
- # The version of Loofah you are using
31
- VERSION = '2.2.3'
72
+ # Alias for Loofah::HTML4
73
+ HTML = HTML4
32
74
 
33
75
  class << self
34
- # Shortcut for Loofah::HTML::Document.parse
35
- # This method accepts the same parameters as Nokogiri::HTML::Document.parse
36
- def document(*args, &block)
37
- Loofah::HTML::Document.parse(*args, &block)
76
+ # Shortcut for Loofah::HTML4::Document.parse(*args, &block)
77
+ #
78
+ # This method accepts the same parameters as Nokogiri::HTML4::Document.parse
79
+ def html4_document(*args, &block)
80
+ Loofah::HTML4::Document.parse(*args, &block)
38
81
  end
39
82
 
40
- # Shortcut for Loofah::HTML::DocumentFragment.parse
41
- # This method accepts the same parameters as Nokogiri::HTML::DocumentFragment.parse
42
- def fragment(*args, &block)
43
- Loofah::HTML::DocumentFragment.parse(*args, &block)
83
+ # Shortcut for Loofah::HTML4::DocumentFragment.parse(*args, &block)
84
+ #
85
+ # This method accepts the same parameters as Nokogiri::HTML4::DocumentFragment.parse
86
+ def html4_fragment(*args, &block)
87
+ Loofah::HTML4::DocumentFragment.parse(*args, &block)
44
88
  end
45
89
 
46
- # Shortcut for Loofah.fragment(string_or_io).scrub!(method)
47
- def scrub_fragment(string_or_io, method)
48
- Loofah.fragment(string_or_io).scrub!(method)
90
+ # Shortcut for Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
91
+ def scrub_html4_document(string_or_io, method)
92
+ Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
49
93
  end
50
94
 
51
- # Shortcut for Loofah.document(string_or_io).scrub!(method)
52
- def scrub_document(string_or_io, method)
53
- Loofah.document(string_or_io).scrub!(method)
95
+ # Shortcut for Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
96
+ def scrub_html4_fragment(string_or_io, method)
97
+ Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
54
98
  end
55
99
 
56
- # Shortcut for Loofah::XML::Document.parse
100
+ if Loofah.html5_support?
101
+ # Shortcut for Loofah::HTML5::Document.parse(*args, &block)
102
+ #
103
+ # This method accepts the same parameters as Nokogiri::HTML5::Document.parse
104
+ def html5_document(*args, &block)
105
+ Loofah::HTML5::Document.parse(*args, &block)
106
+ end
107
+
108
+ # Shortcut for Loofah::HTML5::DocumentFragment.parse(*args, &block)
109
+ #
110
+ # This method accepts the same parameters as Nokogiri::HTML5::DocumentFragment.parse
111
+ def html5_fragment(*args, &block)
112
+ Loofah::HTML5::DocumentFragment.parse(*args, &block)
113
+ end
114
+
115
+ # Shortcut for Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
116
+ def scrub_html5_document(string_or_io, method)
117
+ Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
118
+ end
119
+
120
+ # Shortcut for Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
121
+ def scrub_html5_fragment(string_or_io, method)
122
+ Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
123
+ end
124
+ else
125
+ def html5_document(*args, &block)
126
+ raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
127
+ end
128
+
129
+ def html5_fragment(*args, &block)
130
+ raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
131
+ end
132
+
133
+ def scrub_html5_document(string_or_io, method)
134
+ raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
135
+ end
136
+
137
+ def scrub_html5_fragment(string_or_io, method)
138
+ raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
139
+ end
140
+ end
141
+
142
+ alias_method :document, :html4_document
143
+ alias_method :fragment, :html4_fragment
144
+ alias_method :scrub_document, :scrub_html4_document
145
+ alias_method :scrub_fragment, :scrub_html4_fragment
146
+
147
+ # Shortcut for Loofah::XML::Document.parse(*args, &block)
148
+ #
57
149
  # This method accepts the same parameters as Nokogiri::XML::Document.parse
58
150
  def xml_document(*args, &block)
59
151
  Loofah::XML::Document.parse(*args, &block)
60
152
  end
61
153
 
62
- # Shortcut for Loofah::XML::DocumentFragment.parse
154
+ # Shortcut for Loofah::XML::DocumentFragment.parse(*args, &block)
155
+ #
63
156
  # This method accepts the same parameters as Nokogiri::XML::DocumentFragment.parse
64
157
  def xml_fragment(*args, &block)
65
158
  Loofah::XML::DocumentFragment.parse(*args, &block)
@@ -77,7 +170,7 @@ module Loofah
77
170
 
78
171
  # A helper to remove extraneous whitespace from text-ified HTML
79
172
  def remove_extraneous_whitespace(string)
80
- string.gsub(/\n\s*\n\s*\n/,"\n\n")
173
+ string.gsub(/\n\s*\n\s*\n/, "\n\n")
81
174
  end
82
175
  end
83
176
  end