RubyGems - loofah - Versions diffs - 2.2.3 → 2.21.1 - Mend

loofah 2.2.3 → 2.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +269 -31
data/README.md +109 -124
data/lib/loofah/concerns.rb +207 -0
data/lib/loofah/elements.rb +85 -79
data/lib/loofah/helpers.rb +37 -20
data/lib/loofah/{html → html4}/document.rb +6 -7
data/lib/loofah/html4/document_fragment.rb +15 -0
data/lib/loofah/html5/document.rb +17 -0
data/lib/loofah/html5/document_fragment.rb +15 -0
data/lib/loofah/html5/libxml2_workarounds.rb +10 -8
data/lib/loofah/html5/safelist.rb +1055 -0
data/lib/loofah/html5/scrub.rb +153 -58
data/lib/loofah/metahelpers.rb +11 -6
data/lib/loofah/scrubber.rb +22 -15
data/lib/loofah/scrubbers.rb +66 -55
data/lib/loofah/version.rb +6 -0
data/lib/loofah/xml/document.rb +2 -0
data/lib/loofah/xml/document_fragment.rb +4 -7
data/lib/loofah.rb +131 -38
metadata +28 -216
data/.gemtest +0 -0
data/Gemfile +0 -22
data/Manifest.txt +0 -40
data/Rakefile +0 -79
data/benchmark/benchmark.rb +0 -149
data/benchmark/fragment.html +0 -96
data/benchmark/helper.rb +0 -73
data/benchmark/www.slashdot.com.html +0 -2560
data/lib/loofah/html/document_fragment.rb +0 -40
data/lib/loofah/html5/whitelist.rb +0 -186
data/lib/loofah/instance_methods.rb +0 -127
data/test/assets/msword.html +0 -63
data/test/assets/testdata_sanitizer_tests1.dat +0 -502
data/test/helper.rb +0 -18
data/test/html5/test_sanitizer.rb +0 -382
data/test/integration/test_ad_hoc.rb +0 -204
data/test/integration/test_helpers.rb +0 -43
data/test/integration/test_html.rb +0 -72
data/test/integration/test_scrubbers.rb +0 -400
data/test/integration/test_xml.rb +0 -55
data/test/unit/test_api.rb +0 -142
data/test/unit/test_encoding.rb +0 -20
data/test/unit/test_helpers.rb +0 -62
data/test/unit/test_scrubber.rb +0 -229
data/test/unit/test_scrubbers.rb +0 -14

data/lib/loofah/scrubbers.rb CHANGED Viewed

@@ -1,7 +1,9 @@
+# frozen_string_literal: true
 module Loofah
   #
   #  Loofah provides some built-in scrubbers for sanitizing with
-  #  HTML5lib's whitelist and for accomplishing some common
+  #  HTML5lib's safelist and for accomplishing some common
   #  transformation tasks.
   #
   #
@@ -10,7 +12,7 @@ module Loofah
   #  +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
   #
   #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
-  #     Loofah.fragment(unsafe_html).scrub!(:strip)
+  #     Loofah.html5_fragment(unsafe_html).scrub!(:strip)
   #     => "ohai! <div>div is safe</div> but foo is <b>not</b>"
   #
   #
@@ -19,7 +21,7 @@ module Loofah
   #  +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
   #
   #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
-  #     Loofah.fragment(unsafe_html).scrub!(:prune)
+  #     Loofah.html5_fragment(unsafe_html).scrub!(:prune)
   #     => "ohai! <div>div is safe</div> "
   #
   #
@@ -28,7 +30,7 @@ module Loofah
   #  +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
   #
   #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
-  #     Loofah.fragment(unsafe_html).scrub!(:escape)
+  #     Loofah.html5_fragment(unsafe_html).scrub!(:escape)
   #     => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
   #
   #
@@ -40,7 +42,7 @@ module Loofah
   #  layer of paint on top of the HTML input to make it look nice.
   #
   #     messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
-  #     Loofah.fragment(messy_markup).scrub!(:whitewash)
+  #     Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
   #     => "ohai! <div>div with attributes</div>"
   #
   #  One use case for this scrubber is to clean up HTML that was
@@ -55,7 +57,7 @@ module Loofah
   #  +:nofollow+ adds a rel="nofollow" attribute to all links
   #
   #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
-  #     Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
+  #     Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
   #     => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
   #
   #
@@ -64,7 +66,7 @@ module Loofah
   #  +:noopener+ adds a rel="noopener" attribute to all links
   #
   #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
-  #     Loofah.fragment(link_farmers_markup).scrub!(:noopener)
+  #     Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
   #     => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
   #
   #
@@ -73,7 +75,7 @@ module Loofah
   #  +:unprintable+ removes unprintable Unicode characters.
   #
   #     markup = "<p>Some text with an unprintable character at the end\u2028</p>"
-  #     Loofah.fragment(markup).scrub!(:unprintable)
+  #     Loofah.html5_fragment(markup).scrub!(:unprintable)
   #     => "<p>Some text with an unprintable character at the end</p>"
   #
   #  You may not be able to see the unprintable character in the above example, but there is a
@@ -89,23 +91,20 @@ module Loofah
     #  +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
     #
     #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
-    #     Loofah.fragment(unsafe_html).scrub!(:strip)
+    #     Loofah.html5_fragment(unsafe_html).scrub!(:strip)
     #     => "ohai! <div>div is safe</div> but foo is <b>not</b>"
     #
     class Strip < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :bottom_up
       end
       def scrub(node)
         return CONTINUE if html5lib_sanitize(node) == CONTINUE
-        if node.children.length == 1 && node.children.first.cdata?
-          sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
-          node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
-        else
-          node.before node.children
-        end
+        node.before(node.children)
         node.remove
+        STOP
       end
     end
@@ -115,18 +114,19 @@ module Loofah
     #  +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
     #
     #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
-    #     Loofah.fragment(unsafe_html).scrub!(:prune)
+    #     Loofah.html5_fragment(unsafe_html).scrub!(:prune)
     #     => "ohai! <div>div is safe</div> "
     #
     class Prune < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :top_down
       end
       def scrub(node)
         return CONTINUE if html5lib_sanitize(node) == CONTINUE
         node.remove
-        return STOP
+        STOP
       end
     end
@@ -136,19 +136,20 @@ module Loofah
     #  +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
     #
     #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
-    #     Loofah.fragment(unsafe_html).scrub!(:escape)
+    #     Loofah.html5_fragment(unsafe_html).scrub!(:escape)
     #     => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
     #
     class Escape < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :top_down
       end
       def scrub(node)
         return CONTINUE if html5lib_sanitize(node) == CONTINUE
-        node.add_next_sibling Nokogiri::XML::Text.new(node.to_s, node.document)
+        node.add_next_sibling(Nokogiri::XML::Text.new(node.to_s, node.document))
         node.remove
-        return STOP
+        STOP
       end
     end
@@ -161,7 +162,7 @@ module Loofah
     #  layer of paint on top of the HTML input to make it look nice.
     #
     #     messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
-    #     Loofah.fragment(messy_markup).scrub!(:whitewash)
+    #     Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
     #     => "ohai! <div>div with attributes</div>"
     #
     #  One use case for this scrubber is to clean up HTML that was
@@ -171,14 +172,14 @@ module Loofah
     #  Certainly not me.
     #
     class Whitewash < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :top_down
       end
       def scrub(node)
         case node.type
         when Nokogiri::XML::Node::ELEMENT_NODE
-          if HTML5::Scrub.allowed_element? node.name
+          if HTML5::Scrub.allowed_element?(node.name)
             node.attributes.each { |attr| node.remove_attribute(attr.first) }
             return CONTINUE if node.namespaces.empty?
           end
@@ -196,18 +197,19 @@ module Loofah
     #  +:nofollow+ adds a rel="nofollow" attribute to all links
     #
     #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
-    #     Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
+    #     Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
     #     => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
     #
     class NoFollow < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :top_down
       end
       def scrub(node)
-        return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == 'a')
-        append_attribute(node, 'rel', 'nofollow')
-        return STOP
+        return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
+        append_attribute(node, "rel", "nofollow")
+        STOP
       end
     end
@@ -217,30 +219,37 @@ module Loofah
     #  +:noopener+ adds a rel="noopener" attribute to all links
     #
     #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
-    #     Loofah.fragment(link_farmers_markup).scrub!(:noopener)
+    #     Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
     #     => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
     #
     class NoOpener < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :top_down
       end
       def scrub(node)
-        return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == 'a')
-        append_attribute(node, 'rel', 'noopener')
-        return STOP
+        return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
+        append_attribute(node, "rel", "noopener")
+        STOP
       end
     end
     # This class probably isn't useful publicly, but is used for #to_text's current implemention
     class NewlineBlockElements < Scrubber # :nodoc:
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :bottom_up
       end
       def scrub(node)
-        return CONTINUE unless Loofah::Elements::BLOCK_LEVEL.include?(node.name)
-        node.add_next_sibling Nokogiri::XML::Text.new("\n#{node.content}\n", node.document)
+        return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
+        replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
+          "\n"
+        else
+          "\n#{node.content}\n"
+        end
+        node.add_next_sibling(Nokogiri::XML::Text.new(replacement, node.document))
         node.remove
       end
     end
@@ -251,7 +260,7 @@ module Loofah
     #  +:unprintable+ removes unprintable Unicode characters.
     #
     #     markup = "<p>Some text with an unprintable character at the end\u2028</p>"
-    #     Loofah.fragment(markup).scrub!(:unprintable)
+    #     Loofah.html5_fragment(markup).scrub!(:unprintable)
     #     => "<p>Some text with an unprintable character at the end</p>"
     #
     #  You may not be able to see the unprintable character in the above example, but there is a
@@ -261,13 +270,13 @@ module Loofah
     #     http://timelessrepo.com/json-isnt-a-javascript-subset
     #
     class Unprintable < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :top_down
       end
       def scrub(node)
         if node.type == Nokogiri::XML::Node::TEXT_NODE || node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE
-          node.content = node.content.gsub(/\u2028|\u2029/, '')
+          node.content = node.content.gsub(/\u2028|\u2029/, "")
         end
         CONTINUE
       end
@@ -277,21 +286,23 @@ module Loofah
     #  A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
     #
     MAP = {
-      :escape    => Escape,
-      :prune     => Prune,
-      :whitewash => Whitewash,
-      :strip     => Strip,
-      :nofollow  => NoFollow,
-      :noopener => NoOpener,
-      :newline_block_elements => NewlineBlockElements,
-      :unprintable => Unprintable
+      escape: Escape,
+      prune: Prune,
+      whitewash: Whitewash,
+      strip: Strip,
+      nofollow: NoFollow,
+      noopener: NoOpener,
+      newline_block_elements: NewlineBlockElements,
+      unprintable: Unprintable,
     }
-    #
-    #  Returns an array of symbols representing the built-in scrubbers
-    #
-    def self.scrubber_symbols
-      MAP.keys
+    class << self
+      #
+      #  Returns an array of symbols representing the built-in scrubbers
+      #
+      def scrubber_symbols
+        MAP.keys
+      end
     end
   end
 end

data/lib/loofah/version.rb ADDED Viewed

@@ -0,0 +1,6 @@
+# frozen_string_literal: true
+module Loofah
+  # The version of Loofah you are using
+  VERSION = "2.21.1"
+end

data/lib/loofah/xml/document.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Loofah
   module XML # :nodoc:
     #

data/lib/loofah/xml/document_fragment.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Loofah
   module XML # :nodoc:
     #
@@ -7,15 +9,10 @@ module Loofah
     #
     class DocumentFragment < Nokogiri::XML::DocumentFragment
       class << self
-        #
-        #  Overridden Nokogiri::XML::DocumentFragment
-        #  constructor. Applications should use Loofah.fragment to
-        #  parse a fragment.
-        #
-        def parse tags
+        def parse(tags)
           doc = Loofah::XML::Document.new
           doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
-          self.new(doc, tags)
+          new(doc, tags)
         end
       end
     end

data/lib/loofah.rb CHANGED Viewed

@@ -1,65 +1,158 @@
-$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
+# frozen_string_literal: true
-require 'nokogiri'
+require "nokogiri"
-require 'loofah/metahelpers'
-require 'loofah/elements'
+module Loofah
+  class << self
+    def html5_support?
+      # Note that Loofah can only support HTML5 in Nokogiri >= 1.14.0 because it requires the
+      # subclassing fix from https://github.com/sparklemotion/nokogiri/pull/2534
+      unless @html5_support_set
+        @html5_support = (
+          Gem::Version.new(Nokogiri::VERSION) > Gem::Version.new("1.14.0") &&
+          Nokogiri.uses_gumbo?
+        )
+        @html5_support_set = true
+      end
+      @html5_support
+    end
+  end
+end
+require_relative "loofah/version"
+require_relative "loofah/metahelpers"
+require_relative "loofah/elements"
-require 'loofah/html5/whitelist'
-require 'loofah/html5/libxml2_workarounds'
-require 'loofah/html5/scrub'
+require_relative "loofah/html5/safelist"
+require_relative "loofah/html5/libxml2_workarounds"
+require_relative "loofah/html5/scrub"
-require 'loofah/scrubber'
-require 'loofah/scrubbers'
+require_relative "loofah/scrubber"
+require_relative "loofah/scrubbers"
-require 'loofah/instance_methods'
-require 'loofah/xml/document'
-require 'loofah/xml/document_fragment'
-require 'loofah/html/document'
-require 'loofah/html/document_fragment'
+require_relative "loofah/concerns"
+require_relative "loofah/xml/document"
+require_relative "loofah/xml/document_fragment"
+require_relative "loofah/html4/document"
+require_relative "loofah/html4/document_fragment"
+if Loofah.html5_support?
+  require_relative "loofah/html5/document"
+  require_relative "loofah/html5/document_fragment"
+end
 # == Strings and IO Objects as Input
 #
-# Loofah.document and Loofah.fragment accept any IO object in addition
-# to accepting a string. That IO object could be a file, or a socket,
-# or a StringIO, or anything that responds to +read+ and
-# +close+. Which makes it particularly easy to sanitize mass
-# quantities of docs.
+# The following methods accept any IO object in addition to accepting a string:
+#
+# - Loofah.html4_document
+# - Loofah.html4_fragment
+# - Loofah.scrub_html4_document
+# - Loofah.scrub_html4_fragment
+#
+# - Loofah.html5_document
+# - Loofah.html5_fragment
+# - Loofah.scrub_html5_document
+# - Loofah.scrub_html5_fragment
+#
+# - Loofah.xml_document
+# - Loofah.xml_fragment
+# - Loofah.scrub_xml_document
+# - Loofah.scrub_xml_fragment
+#
+# - Loofah.document
+# - Loofah.fragment
+# - Loofah.scrub_document
+# - Loofah.scrub_fragment
+#
+# That IO object could be a file, or a socket, or a StringIO, or anything that responds to +read+
+# and +close+.
 #
 module Loofah
-  # The version of Loofah you are using
-  VERSION = '2.2.3'
+  # Alias for Loofah::HTML4
+  HTML = HTML4
   class << self
-    # Shortcut for Loofah::HTML::Document.parse
-    # This method accepts the same parameters as Nokogiri::HTML::Document.parse
-    def document(*args, &block)
-      Loofah::HTML::Document.parse(*args, &block)
+    # Shortcut for Loofah::HTML4::Document.parse(*args, &block)
+    #
+    # This method accepts the same parameters as Nokogiri::HTML4::Document.parse
+    def html4_document(*args, &block)
+      Loofah::HTML4::Document.parse(*args, &block)
     end
-    # Shortcut for Loofah::HTML::DocumentFragment.parse
-    # This method accepts the same parameters as Nokogiri::HTML::DocumentFragment.parse
-    def fragment(*args, &block)
-      Loofah::HTML::DocumentFragment.parse(*args, &block)
+    # Shortcut for Loofah::HTML4::DocumentFragment.parse(*args, &block)
+    #
+    # This method accepts the same parameters as Nokogiri::HTML4::DocumentFragment.parse
+    def html4_fragment(*args, &block)
+      Loofah::HTML4::DocumentFragment.parse(*args, &block)
     end
-    # Shortcut for Loofah.fragment(string_or_io).scrub!(method)
-    def scrub_fragment(string_or_io, method)
-      Loofah.fragment(string_or_io).scrub!(method)
+    # Shortcut for Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
+    def scrub_html4_document(string_or_io, method)
+      Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
     end
-    # Shortcut for Loofah.document(string_or_io).scrub!(method)
-    def scrub_document(string_or_io, method)
-      Loofah.document(string_or_io).scrub!(method)
+    # Shortcut for Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
+    def scrub_html4_fragment(string_or_io, method)
+      Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
     end
-    # Shortcut for Loofah::XML::Document.parse
+    if Loofah.html5_support?
+      # Shortcut for Loofah::HTML5::Document.parse(*args, &block)
+      #
+      # This method accepts the same parameters as Nokogiri::HTML5::Document.parse
+      def html5_document(*args, &block)
+        Loofah::HTML5::Document.parse(*args, &block)
+      end
+      # Shortcut for Loofah::HTML5::DocumentFragment.parse(*args, &block)
+      #
+      # This method accepts the same parameters as Nokogiri::HTML5::DocumentFragment.parse
+      def html5_fragment(*args, &block)
+        Loofah::HTML5::DocumentFragment.parse(*args, &block)
+      end
+      # Shortcut for Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
+      def scrub_html5_document(string_or_io, method)
+        Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
+      end
+      # Shortcut for Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
+      def scrub_html5_fragment(string_or_io, method)
+        Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
+      end
+    else
+      def html5_document(*args, &block)
+        raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
+      end
+      def html5_fragment(*args, &block)
+        raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
+      end
+      def scrub_html5_document(string_or_io, method)
+        raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
+      end
+      def scrub_html5_fragment(string_or_io, method)
+        raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
+      end
+    end
+    alias_method :document, :html4_document
+    alias_method :fragment, :html4_fragment
+    alias_method :scrub_document, :scrub_html4_document
+    alias_method :scrub_fragment, :scrub_html4_fragment
+    # Shortcut for Loofah::XML::Document.parse(*args, &block)
+    #
     # This method accepts the same parameters as Nokogiri::XML::Document.parse
     def xml_document(*args, &block)
       Loofah::XML::Document.parse(*args, &block)
     end
-    # Shortcut for Loofah::XML::DocumentFragment.parse
+    # Shortcut for Loofah::XML::DocumentFragment.parse(*args, &block)
+    #
     # This method accepts the same parameters as Nokogiri::XML::DocumentFragment.parse
     def xml_fragment(*args, &block)
       Loofah::XML::DocumentFragment.parse(*args, &block)
@@ -77,7 +170,7 @@ module Loofah
     # A helper to remove extraneous whitespace from text-ified HTML
     def remove_extraneous_whitespace(string)
-      string.gsub(/\n\s*\n\s*\n/,"\n\n")
+      string.gsub(/\n\s*\n\s*\n/, "\n\n")
     end
   end
 end