RubyGems - sanitize - Versions diffs - 5.0.0 → 5.1.0 - Mend

sanitize 5.0.0 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sanitize might be problematic. Click here for more details.

Files changed (12) hide show

checksums.yaml +4 -4
data/HISTORY.md +19 -0
data/README.md +11 -0
data/lib/sanitize.rb +21 -7
data/lib/sanitize/config/default.rb +4 -0
data/lib/sanitize/version.rb +1 -1
data/test/common.rb +0 -31
data/test/test_malicious_html.rb +22 -7
data/test/test_sanitize.rb +98 -13
data/test/test_sanitize_css.rb +39 -12
metadata +3 -5
data/test/test_unicode.rb +0 -95

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c88243234986bc11c6e1da92e05f9ea153d6016f5e5c3c8e8ad6602b7225e07f
-  data.tar.gz: abf83048949361fbcaf7fdb1d03066c9787303ceee39c42d69a245d300bc4453
+  metadata.gz: 8cf7bac25cea64ed464d106bdc57019388598ca9f1a4e7d8eddf3a98bab12267
+  data.tar.gz: e8b1f402b0d67a825b0ad4aad83829816fd9c78cd8445879636cba0a282e8ee5
 SHA512:
-  metadata.gz: f72364a3ec7939a07d30f681c58f4bd4bafa804dff0ecef69a8fb31b16d2e77439c4b1e18c756e370b756067e1bacd7bd8ea8943d447ad144396068da57798a2
-  data.tar.gz: 1ac997e7ae3f0ffc65d002e439b63bf755acda220bb295a7d648d474333e9d9747259f4cca2af715da8df9f425c17eb8a8148ba5cf12c91cbfee71a74da15eda
+  metadata.gz: 956edaca6569a5933223da0aa7dcac4880b5164aa59e37256ac896c9fefb271da71425defe7e09e241b1333b441f5a2629893abed6d5a2a47d0726bf03597614
+  data.tar.gz: e45a018b904bcf8cb996f8ed08427e80b8ce058c4fe414782460c5496e88bb6c2a4055304118057621a630e514b4f96bac11bdc686181a6f0097dc7bf912ab04

data/HISTORY.md CHANGED Viewed

@@ -1,5 +1,24 @@
 # Sanitize History
+## 5.1.0 (2019-09-07)
+### Features
+* Added a `:parser_options` config hash, which makes it possible to pass custom
+  parsing options to Nokogumbo. [@austin-wang - #194][194]
+### Bug Fixes
+* Non-characters and non-whitespace control characters are now stripped from
+  HTML input before parsing to comply with the HTML Standard's [preprocessing
+  guidelines][html-preprocessing]. Prior to this Sanitize had adhered to [older
+  W3C guidelines][unicode-xml] that have since been withdrawn. [#179][179]
+[179]:https://github.com/rgrove/sanitize/issues/179
+[194]:https://github.com/rgrove/sanitize/pull/194
+[html-preprocessing]:https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
+[unicode-xml]:https://www.w3.org/TR/unicode-xml/
 ## 5.0.0 (2018-10-14)
 For most users, upgrading from 4.x shouldn't require any changes. However, the

data/README.md CHANGED Viewed

@@ -417,6 +417,17 @@ elements not in this array will be removed.
 ]
 ```
+#### :parser_options (Hash)
+[Parsing options](https://github.com/rubys/nokogumbo/tree/v2.0.1#parsing-options) supplied to `nokogumbo`.
+```ruby
+:parser_options => {
+  max_errors: -1,
+  max_tree_depth: -1
+}
+```
 #### :protocols (Hash)
 URL protocols to allow in specific attributes. If an attribute is listed here

data/lib/sanitize.rb CHANGED Viewed

@@ -19,6 +19,20 @@ require_relative 'sanitize/transformers/clean_element'
 class Sanitize
   attr_reader :config
+  # Matches one or more control characters that should be removed from HTML
+  # before parsing, as defined by the HTML living standard.
+  #
+  # -   https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
+  # -   https://infra.spec.whatwg.org/#control
+  REGEX_HTML_CONTROL_CHARACTERS = /[\u0001-\u0008\u000b\u000e-\u001f\u007f-\u009f]+/u
+  # Matches one or more non-characters that should be removed from HTML before
+  # parsing, as defined by the HTML living standard.
+  #
+  # -   https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
+  # -   https://infra.spec.whatwg.org/#noncharacter
+  REGEX_HTML_NON_CHARACTERS = /[\ufdd0-\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}]+/u
   # Matches an attribute value that could be treated by a browser as a URL
   # with a protocol prefix, such as "http:" or "javascript:". Any string of zero
   # or more characters followed by a colon is considered a match, even if the
@@ -26,11 +40,12 @@ class Sanitize
   # IE6 and Opera will still parse).
   REGEX_PROTOCOL = /\A\s*([^\/#]*?)(?:\:|&#0*58|&#x0*3a)/i
-  # Matches Unicode characters that should be stripped from HTML before passing
-  # it to the parser.
+  # Matches one or more characters that should be stripped from HTML before
+  # parsing. This is a combination of `REGEX_HTML_CONTROL_CHARACTERS` and
+  # `REGEX_HTML_NON_CHARACTERS`.
   #
-  # http://www.w3.org/TR/unicode-xml/#Charlist
-  REGEX_UNSUITABLE_CHARS = /[\u0000\u0340\u0341\u17a3\u17d3\u2028\u2029\u202a-\u202e\u206a-\u206f\ufff9-\ufffb\ufeff\ufffc\u{1d173}-\u{1d17a}\u{e0000}-\u{e007f}]/u
+  # https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
+  REGEX_UNSUITABLE_CHARS = /(?:#{REGEX_HTML_CONTROL_CHARACTERS}|#{REGEX_HTML_NON_CHARACTERS})/u
   #--
   # Class Methods
@@ -108,7 +123,7 @@ class Sanitize
   def document(html)
     return '' unless html
-    doc = Nokogiri::HTML5.parse(preprocess(html))
+    doc = Nokogiri::HTML5.parse(preprocess(html), **@config[:parser_options])
     node!(doc)
     to_html(doc)
   end
@@ -120,8 +135,7 @@ class Sanitize
   def fragment(html)
     return '' unless html
-    html = preprocess(html)
-    frag  = Nokogiri::HTML5.fragment(html)
+    frag = Nokogiri::HTML5.fragment(preprocess(html), **@config[:parser_options])
     node!(frag)
     to_html(frag)
   end

data/lib/sanitize/config/default.rb CHANGED Viewed

@@ -56,6 +56,10 @@ class Sanitize
       # that all HTML will be stripped).
       :elements => [],
+      # HTML parsing options to pass to Nokogumbo.
+      # https://github.com/rubys/nokogumbo/tree/v2.0.1#parsing-options
+      :parser_options => {},
       # URL handling protocols to allow in specific attributes. By default, no
       # protocols are allowed. Use :relative in place of a protocol if you want
       # to allow relative URLs sans protocol.

data/lib/sanitize/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # encoding: utf-8
 class Sanitize
-  VERSION = '5.0.0'
+  VERSION = '5.1.0'
 end

data/test/common.rb CHANGED Viewed

@@ -1,34 +1,3 @@
 # encoding: utf-8
-gem 'minitest'
 require 'minitest/autorun'
 require_relative '../lib/sanitize'
-# Helper to stub an instance method. Shamelessly stolen from
-# https://github.com/codeodor/minitest-stub_any_instance/
-class Object
-  def self.stub_instance(name, value, &block)
-    old_method = "__stubbed_method_#{name}__"
-    class_eval do
-      alias_method old_method, name
-      define_method(name) do |*args|
-        if value.respond_to?(:call) then
-          value.call(*args)
-        else
-          value
-        end
-      end
-    end
-    yield
-  ensure
-    class_eval do
-      undef_method name
-      alias_method name, old_method
-      undef_method old_method
-    end
-  end
-end

data/test/test_malicious_html.rb CHANGED Viewed

@@ -166,12 +166,19 @@ describe 'Malicious HTML' do
         input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
         it 'should escape unsafe characters in attributes' do
-          output = %[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>]
-          @s.fragment(input).must_equal(output)
+          # This uses Nokogumbo's HTML-compliant serializer rather than
+          # libxml2's.
+          @s.fragment(input).
+            must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
+          # This uses the not-quite-standards-compliant libxml2 serializer via
+          # Nokogiri, so the output may be a little different as of Nokogiri
+          # 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
+          # https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
           fragment = Nokogiri::HTML.fragment(input)
           @s.node!(fragment)
-          fragment.to_html.must_equal(output)
+          fragment.to_html.
+            must_equal(%[<#{tag_name} #{attr_name}="examp&lt;!--%22%20onmouseover=alert(1)&gt;--&gt;le.com">foo</#{tag_name}>])
         end
         it 'should round-trip to the same output' do
@@ -184,11 +191,19 @@ describe 'Malicious HTML' do
         input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
         it 'should not escape characters unnecessarily' do
-          @s.fragment(input).must_equal(%[<#{tag_name} #{attr_name}="examp<!--&quot; onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
+          # This uses Nokogumbo's HTML-compliant serializer rather than
+          # libxml2's.
+          @s.fragment(input).
+            must_equal(%[<#{tag_name} #{attr_name}="examp<!--&quot; onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
+          # This uses the not-quite-standards-compliant libxml2 serializer via
+          # Nokogiri, so the output may be a little different as of Nokogiri
+          # 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
+          # https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
           fragment = Nokogiri::HTML.fragment(input)
           @s.node!(fragment)
-          fragment.to_html.must_equal(%[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>])
+          fragment.to_html.
+            must_equal(%[<#{tag_name} #{attr_name}='examp&lt;!--" onmouseover=alert(1)&gt;--&gt;le.com'>foo</#{tag_name}>])
         end
         it 'should round-trip to the same output' do

data/test/test_sanitize.rb CHANGED Viewed

@@ -37,6 +37,44 @@ describe 'Sanitize' do
       it 'should not choke on frozen documents' do
         @s.document('<!doctype html><html><b>foo</b>'.freeze).must_equal "<html>foo</html>"
       end
+      it 'should normalize newlines' do
+        @s.document("a\r\n\n\r\r\r\nz").must_equal "<html>a\n\n\n\n\nz</html>"
+      end
+      it 'should strip control characters (except ASCII whitespace)' do
+        sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
+        whitespace = "\t\n\f\u0020"
+        @s.document("a#{sample_control_chars}#{whitespace}z").must_equal "<html>a#{whitespace}z</html>"
+      end
+      it 'should strip non-characters' do
+        sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
+        @s.document("a#{sample_non_chars}z").must_equal "<html>az</html>"
+      end
+      describe 'when html body exceeds Nokogumbo::DEFAULT_MAX_TREE_DEPTH' do
+        let(:content) do
+          content = nest_html_content('<b>foo</b>', Nokogumbo::DEFAULT_MAX_TREE_DEPTH)
+          "<html>#{content}</html>"
+        end
+        it 'raises an ArgumentError exception' do
+          assert_raises ArgumentError do
+            @s.document(content)
+          end
+        end
+        describe 'and :max_tree_depth of -1 is supplied in :parser_options' do
+          before do
+            @s = Sanitize.new(elements: ['html'], parser_options: { max_tree_depth: -1 })
+          end
+          it 'does not raise an ArgumentError exception' do
+            @s.document(content).must_equal '<html>foo</html>'
+          end
+        end
+      end
     end
     describe '#fragment' do
@@ -61,6 +99,44 @@ describe 'Sanitize' do
       it 'should not choke on frozen fragments' do
         @s.fragment('<b>foo</b>'.freeze).must_equal 'foo'
       end
+      it 'should normalize newlines' do
+        @s.fragment("a\r\n\n\r\r\r\nz").must_equal "a\n\n\n\n\nz"
+      end
+      it 'should strip control characters (except ASCII whitespace)' do
+        sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
+        whitespace = "\t\n\f\u0020"
+        @s.fragment("a#{sample_control_chars}#{whitespace}z").must_equal "a#{whitespace}z"
+      end
+      it 'should strip non-characters' do
+        sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
+        @s.fragment("a#{sample_non_chars}z").must_equal "az"
+      end
+      describe 'when html body exceeds Nokogumbo::DEFAULT_MAX_TREE_DEPTH' do
+        let(:content) do
+          content = nest_html_content('<b>foo</b>', Nokogumbo::DEFAULT_MAX_TREE_DEPTH)
+          "<body>#{content}</body>"
+        end
+        it 'raises an ArgumentError exception' do
+          assert_raises ArgumentError do
+            @s.fragment(content)
+          end
+        end
+        describe 'and :max_tree_depth of -1 is supplied in :parser_options' do
+          before do
+            @s = Sanitize.new(parser_options: { max_tree_depth: -1 })
+          end
+          it 'does not raise an ArgumentError exception' do
+            @s.fragment(content).must_equal 'foo'
+          end
+        end
+      end
     end
     describe '#node!' do
@@ -85,28 +161,37 @@ describe 'Sanitize' do
   describe 'class methods' do
     describe '.document' do
-      it 'should call #document' do
-        Sanitize.stub_instance(:document, proc {|html| html + ' called' }) do
-          Sanitize.document('<html>foo</html>')
-            .must_equal '<html>foo</html> called'
-        end
+      it 'should sanitize an HTML document with the given config' do
+        html = '<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>'
+        Sanitize.document(html, :elements => ['html'])
+          .must_equal "<html>Lorem ipsum dolor sit amet </html>"
       end
     end
     describe '.fragment' do
-      it 'should call #fragment' do
-        Sanitize.stub_instance(:fragment, proc {|html| html + ' called' }) do
-          Sanitize.fragment('<b>foo</b>').must_equal '<b>foo</b> called'
-        end
+      it 'should sanitize an HTML fragment with the given config' do
+        html = '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>'
+        Sanitize.fragment(html, :elements => ['strong'])
+          .must_equal 'Lorem ipsum <strong>dolor</strong> sit amet '
       end
     end
     describe '.node!' do
-      it 'should call #node!' do
-        Sanitize.stub_instance(:node!, proc {|input| input + ' called' }) do
-          Sanitize.node!('not really a node').must_equal 'not really a node called'
-        end
+      it 'should sanitize a Nokogiri::XML::Node with the given config' do
+        doc = Nokogiri::HTML5.parse('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
+        frag = doc.fragment
+        doc.xpath('/html/body/node()').each {|node| frag << node }
+        Sanitize.node!(frag, :elements => ['strong'])
+        frag.to_html.must_equal 'Lorem ipsum <strong>dolor</strong> sit amet '
       end
     end
   end
+  private
+  def nest_html_content(html_content, depth)
+    "#{'<span>' * depth}#{html_content}#{'</span>' * depth}"
+  end
 end

data/test/test_sanitize_css.rb CHANGED Viewed

@@ -196,26 +196,53 @@ describe 'Sanitize::CSS' do
   describe 'class methods' do
     describe '.properties' do
-      it 'should call #properties' do
-        Sanitize::CSS.stub_instance(:properties, proc {|css| css + 'bar' }) do
-          Sanitize::CSS.properties('foo').must_equal 'foobar'
-        end
+      it 'should sanitize CSS properties with the given config' do
+        css = 'background: #fff; width: expression(alert("hi"));'
+        Sanitize::CSS.properties(css).must_equal ' '
+        Sanitize::CSS.properties(css, Sanitize::Config::RELAXED[:css]).must_equal 'background: #fff; '
+        Sanitize::CSS.properties(css, :properties => %w[background color width]).must_equal 'background: #fff; '
       end
     end
     describe '.stylesheet' do
-      it 'should call #stylesheet' do
-        Sanitize::CSS.stub_instance(:stylesheet, proc {|css| css + 'bar' }) do
-          Sanitize::CSS.stylesheet('foo').must_equal 'foobar'
-        end
+      it 'should sanitize a CSS stylesheet with the given config' do
+        css = %[
+          /* Yay CSS! */
+          .foo { color: #fff; }
+          #bar { background: url(yay.jpg); }
+          @media screen (max-width:480px) {
+            .foo { width: 400px; }
+            #bar:not(.baz) { height: 100px; }
+          }
+        ].strip
+        Sanitize::CSS.stylesheet(css).strip.must_equal %[
+          .foo {  }
+          #bar {  }
+        ].strip
+        Sanitize::CSS.stylesheet(css, Sanitize::Config::RELAXED[:css]).must_equal css
+        Sanitize::CSS.stylesheet(css, :properties => %w[background color width]).strip.must_equal %[
+          .foo { color: #fff; }
+          #bar {  }
+        ].strip
       end
     end
     describe '.tree!' do
-      it 'should call #tree!' do
-        Sanitize::CSS.stub_instance(:tree!, proc {|tree| tree + 'bar' }) do
-          Sanitize::CSS.tree!('foo').must_equal 'foobar'
-        end
+      it 'should sanitize a Crass CSS parse tree with the given config' do
+        tree = Crass.parse(String.new("@import url(foo.css);\n") <<
+          ".foo { background: #fff; font: 16pt 'Comic Sans MS'; }\n" <<
+          "#bar { top: 125px; background: green; }")
+        Sanitize::CSS.tree!(tree, :properties => %w[background color width]).must_be_same_as tree
+        Crass::Parser.stringify(tree).must_equal String.new("\n") <<
+            ".foo { background: #fff;  }\n" <<
+            "#bar {  background: green; }"
       end
     end
   end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: sanitize
 version: !ruby/object:Gem::Version
-  version: 5.0.0
+  version: 5.1.0
 platform: ruby
 authors:
 - Ryan Grove
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-10-15 00:00:00.000000000 Z
+date: 2019-09-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: crass
@@ -116,7 +116,6 @@ files:
 - test/test_sanitize.rb
 - test/test_sanitize_css.rb
 - test/test_transformers.rb
-- test/test_unicode.rb
 homepage: https://github.com/rgrove/sanitize/
 licenses:
 - MIT
@@ -136,8 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: 1.2.0
 requirements: []
-rubyforge_project:
-rubygems_version: 2.7.6
+rubygems_version: 3.0.3
 signing_key:
 specification_version: 4
 summary: Whitelist-based HTML and CSS sanitizer.

data/test/test_unicode.rb DELETED Viewed

@@ -1,95 +0,0 @@
-# encoding: utf-8
-require_relative 'common'
-describe 'Unicode' do
-  make_my_diffs_pretty!
-  parallelize_me!
-  # http://www.w3.org/TR/unicode-xml/#Charlist
-  describe 'Unsuitable characters' do
-    before do
-      @s = Sanitize.new(Sanitize::Config::RELAXED)
-    end
-    it 'should not modify the input string' do
-      fragment = "a\u0340b\u0341c"
-      document = "a\u0340b\u0341c"
-      @s.document(document)
-      @s.fragment(fragment)
-      fragment.must_equal "a\u0340b\u0341c"
-      document.must_equal "a\u0340b\u0341c"
-    end
-    it 'should strip deprecated grave and acute clones' do
-      @s.document("a\u0340b\u0341c").must_equal "<html><head></head><body>abc</body></html>"
-      @s.fragment("a\u0340b\u0341c").must_equal 'abc'
-    end
-    it 'should strip deprecated Khmer characters' do
-      @s.document("a\u17a3b\u17d3c").must_equal "<html><head></head><body>abc</body></html>"
-      @s.fragment("a\u17a3b\u17d3c").must_equal 'abc'
-    end
-    it 'should strip line and paragraph separator punctuation' do
-      @s.document("a\u2028b\u2029c").must_equal "<html><head></head><body>abc</body></html>"
-      @s.fragment("a\u2028b\u2029c").must_equal 'abc'
-    end
-    it 'should strip bidi embedding control characters' do
-      @s.document("a\u202ab\u202bc\u202cd\u202de\u202e")
-        .must_equal "<html><head></head><body>abcde</body></html>"
-      @s.fragment("a\u202ab\u202bc\u202cd\u202de\u202e")
-        .must_equal 'abcde'
-    end
-    it 'should strip deprecated symmetric swapping characters' do
-      @s.document("a\u206ab\u206bc").must_equal "<html><head></head><body>abc</body></html>"
-      @s.fragment("a\u206ab\u206bc").must_equal 'abc'
-    end
-    it 'should strip deprecated Arabic form shaping characters' do
-      @s.document("a\u206cb\u206dc").must_equal "<html><head></head><body>abc</body></html>"
-      @s.fragment("a\u206cb\u206dc").must_equal 'abc'
-    end
-    it 'should strip deprecated National digit shape characters' do
-      @s.document("a\u206eb\u206fc").must_equal "<html><head></head><body>abc</body></html>"
-      @s.fragment("a\u206eb\u206fc").must_equal 'abc'
-    end
-    it 'should strip interlinear annotation characters' do
-      @s.document("a\ufff9b\ufffac\ufffb").must_equal "<html><head></head><body>abc</body></html>"
-      @s.fragment("a\ufff9b\ufffac\ufffb").must_equal 'abc'
-    end
-    it 'should strip BOM/zero-width non-breaking space characters' do
-      @s.document("a\ufeffbc").must_equal "<html><head></head><body>abc</body></html>"
-      @s.fragment("a\ufeffbc").must_equal 'abc'
-    end
-    it 'should strip object replacement characters' do
-      @s.document("a\ufffcbc").must_equal "<html><head></head><body>abc</body></html>"
-      @s.fragment("a\ufffcbc").must_equal 'abc'
-    end
-    it 'should strip musical notation scoping characters' do
-      @s.document("a\u{1d173}b\u{1d174}c\u{1d175}d\u{1d176}e\u{1d177}f\u{1d178}g\u{1d179}h\u{1d17a}")
-        .must_equal "<html><head></head><body>abcdefgh</body></html>"
-      @s.fragment("a\u{1d173}b\u{1d174}c\u{1d175}d\u{1d176}e\u{1d177}f\u{1d178}g\u{1d179}h\u{1d17a}")
-        .must_equal 'abcdefgh'
-    end
-    it 'should strip language tag code point characters' do
-      str = String.new 'a'
-      (0xE0000..0xE007F).each {|n| str << [n].pack('U') }
-      str << 'b'
-      @s.document(str).must_equal "<html><head></head><body>ab</body></html>"
-      @s.fragment(str).must_equal 'ab'
-    end
-  end
-end