RubyGems - postrank-uri - Versions diffs - 1.0.18 → 1.0.20 - Mend

postrank-uri 1.0.18 → 1.0.20

Files changed (11) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/Appraisals +19 -0
data/LICENSE +21 -0
data/README.md +26 -1
data/Rakefile +3 -0
data/lib/postrank-uri.rb +12 -7
data/lib/postrank-uri/version.rb +1 -1
data/postrank-uri.gemspec +6 -3
data/spec/postrank-uri_spec.rb +163 -148
metadata +73 -19

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 1f9a235d0c2287732278b672997027c7d4093d0f
-  data.tar.gz: 8c91e4b1c787c43e6ea2ccc51593be3809746fa7
+  metadata.gz: 2a453df060f3bb0d7ea04c7031d17f9e2039e8fc
+  data.tar.gz: fc665791c45e60179c706e9e0e2b3c8c50a58f73
 SHA512:
-  metadata.gz: c702922f05473c762d7223777eb48641db8c4b0e156539c729433f3aa9174b010bce707801ee8c7ec310e4181edb3402c77131d3a66b0bdc02e99f87c1d9982a
-  data.tar.gz: 06040dead1c6348febb64211e2787216c287c87baa09555947e03a4bea09b7293ed33faed08851e28aaa2ff08389d121a48746cc6a6f3af6c7e023f75db1917a
+  metadata.gz: 4c6eec4d6e64e4c400d1ba9e2508a6bca95008452d3e64f22511b56236892a12afd582a7d8be28b5044e0e03f709459f5bba238359ac6408d42221912e8970a3
+  data.tar.gz: 7816c251f9ba449f2f3ea3c8e8e4fb1f80c2e76f8fd004c2c73021b3e47fc7cc1068467f90e17986f041ea948c375b24ce7248a837d8f0d13dedc1f0c4773ed1

data/.gitignore CHANGED

@@ -1,2 +1,3 @@
 pkg
 Gemfile.lock
+/gemfiles

data/Appraisals ADDED

@@ -0,0 +1,19 @@
+appraise "nokogiri-1.7" do
+  gem "nokogiri", "~> 1.7.0"
+end
+appraise "nokogiri-1.6" do
+  gem "nokogiri", "~> 1.6.1"
+end
+appraise "addressable-2.3" do
+  gem "addressable", "~> 2.3.0"
+end
+appraise "addressable-2.4" do
+  gem "addressable", "~> 2.4.0"
+end
+appraise "addressable-2.5" do
+  gem "addressable", "~> 2.5.0"
+end

data/LICENSE ADDED

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2011 Ilya Grigorik
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/README.md CHANGED

@@ -37,4 +37,29 @@ In a nutshell, we need to make sure that creative cases like the ones below all
 As part of URI canonicalization the library will remove common tracking parameters from Google Analytics and several other providers. Beyond that, host-specific rules are also applied. For example, nytimes.com likes to add a 'partner' query parameter for tracking purposes, but which has no effect on the content - hence, it is removed from the URI. For full list, see the c14n.yml file.
-Detecting "duplicate URLs" is a hard problem to solve (expensive in all senses), instead we are compiling a manually assembled database. If you find cases which are missing, please do report them, or send us a pull request!
+Detecting "duplicate URLs" is a hard problem to solve (expensive in all senses), instead we are compiling a manually assembled database. If you find cases which are missing, please do report them, or send us a pull request!
+## Development
+### Setup
+```
+bundle install
+```
+### Running tests
+```
+bundle exec rake
+```
+### Running dependency appraisals
+To verify `postrake-uri` works with different versions of its runtime dependencies you can run:
+```
+bundle exec appraisal install
+bundle exec rake appraisal
+```
+This will execute the test suite with different versions of the dependencies.

data/Rakefile CHANGED

@@ -3,3 +3,6 @@ Bundler::GemHelper.install_tasks
 require 'rspec/core/rake_task'
 RSpec::Core::RakeTask.new(:spec)
+task :default => :spec
+require 'appraisal'

data/lib/postrank-uri.rb CHANGED

@@ -1,4 +1,4 @@
+# encoding: utf-8
 require 'addressable/uri'
 require 'digest/md5'
 require 'nokogiri'
@@ -9,7 +9,7 @@ module Addressable
   class URI
     def domain
       host = self.host
-      (host && PublicSuffix.valid?(host)) ? PublicSuffix.parse(host).domain : nil
+      (host && PublicSuffix.valid?(host, default_rule: nil)) ? PublicSuffix.parse(host).domain : nil
     end
     def normalized_query
@@ -86,8 +86,9 @@ module PostRank
           )
         }iox;
+    URIREGEX[:reserved_characters] = /%3F|%26/i
     URIREGEX[:escape]   = /([^ a-zA-Z0-9_.-]+)/x
-    URIREGEX[:unescape] = /((?:%[0-9a-fA-F]{2})+)/x
+    URIREGEX[:unescape] = /(%[0-9a-fA-F]{2})/x
     URIREGEX.each_pair{|k,v| v.freeze }
     module_function
@@ -97,7 +98,7 @@ module PostRank
       urls = []
       text.to_s.scan(URIREGEX[:valid_url]) do |all, before, url, protocol, domain, path, query|
         # Only extract the URL if the domain is valid
-        if PublicSuffix.valid?(domain)
+        if PublicSuffix.valid?(domain, default_rule: nil)
           url = clean(url)
           urls.push url.to_s
         end
@@ -131,8 +132,12 @@ module PostRank
     def unescape(uri)
       u = parse(uri)
       u.query = u.query.tr('+', ' ') if u.query
-      u.to_s.gsub(URIREGEX[:unescape]) do
-        [$1.delete('%')].pack('H*')
+      u.to_s.gsub(URIREGEX[:unescape]) do |encoded|
+        if encoded.match? URIREGEX[:reserved_characters]
+          encoded
+        else
+          [encoded.delete('%')].pack('H*')
+        end
       end
     end
@@ -225,7 +230,7 @@ module PostRank
       cleaned_uri = clean(uri, :raw => true)
       if host = cleaned_uri.host
-        is_valid = PublicSuffix.valid?(Addressable::IDNA.to_unicode(host))
+        is_valid = PublicSuffix.valid?(Addressable::IDNA.to_unicode(host), default_rule: nil)
       end
       is_valid

data/lib/postrank-uri/version.rb CHANGED

@@ -1,5 +1,5 @@
 module PostRank
   module URI
-    VERSION = "1.0.18"
+    VERSION = "1.0.20"
   end
 end

data/postrank-uri.gemspec CHANGED

@@ -11,14 +11,17 @@ Gem::Specification.new do |s|
   s.homepage    = "http://github.com/postrank-labs/postrank-uri"
   s.summary     = "URI normalization, c14n, escaping, and extraction"
   s.description = s.summary
+  s.license     = 'MIT'
   s.rubyforge_project = "postrank-uri"
-  s.add_dependency "addressable",   "~> 2.3.0"
-  s.add_dependency "public_suffix", "~> 1.1.3"
-  s.add_dependency "nokogiri",      "~> 1.6.1"
+  s.add_dependency "addressable",   ">= 2.3.0", "< 2.6"
+  s.add_dependency "public_suffix", ">= 2.0.0", "< 2.1"
+  s.add_dependency "nokogiri",      ">= 1.6.1", "< 1.8"
+  s.add_development_dependency "rake"
   s.add_development_dependency "rspec"
+  s.add_development_dependency "appraisal", ">= 2.0.0", "< 3.0"
   s.files         = `git ls-files`.split("\n")
   s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")

data/spec/postrank-uri_spec.rb CHANGED

@@ -3,103 +3,101 @@
 require 'helper'
 describe PostRank::URI do
-  let(:igvita) { 'http://igvita.com/' }
   context "escaping" do
-    it "should escape PostRank::URI string" do
-      PostRank::URI.escape('id=1').should == 'id%3D1'
+    it "escapes PostRank::URI string" do
+      expect(PostRank::URI.escape('id=1')).to eq('id%3D1')
     end
-    it "should escape spaces as %20's" do
-      PostRank::URI.escape('id= 1').should match('%20')
+    it "escapes spaces as %20's" do
+      expect(PostRank::URI.escape('id= 1')).to match('%20')
     end
   end
   context "unescape" do
-    it "should unescape PostRank::URI" do
-      PostRank::URI.unescape(PostRank::URI.escape('id=1')).should == 'id=1'
+    it "unescapes PostRank::URI" do
+      expect(PostRank::URI.unescape(PostRank::URI.escape('id=1'))).to eq('id=1')
     end
-    it "should unescape PostRank::URI with spaces" do
-      PostRank::URI.unescape(PostRank::URI.escape('id= 1')).should == 'id= 1'
+    it "unescapes PostRank::URI with spaces" do
+      expect(PostRank::URI.unescape(PostRank::URI.escape('id= 1'))).to eq('id= 1')
     end
     context "accept improperly escaped PostRank::URI strings" do
       # See http://tools.ietf.org/html/rfc3986#section-2.3
-      it "should unescape PostRank::URI with spaces encoded as '+'" do
-        PostRank::URI.unescape('?id=+1').should == '?id= 1'
+      it "unescapes PostRank::URI with spaces encoded as '+'" do
+        expect(PostRank::URI.unescape('?id=+1')).to eq('?id= 1')
       end
-      it "should unescape PostRank::URI with spaces encoded as '+'" do
-        PostRank::URI.unescape('?id%3D+1').should == '?id= 1'
+      it "unescapes PostRank::URI with spaces encoded as '+'" do
+        expect(PostRank::URI.unescape('?id%3D+1')).to eq('?id= 1')
       end
-      it "should unescape PostRank::URI with spaces encoded as %20" do
-        PostRank::URI.unescape('?id=%201').should == '?id= 1'
+      it "unescapes PostRank::URI with spaces encoded as %20" do
+        expect(PostRank::URI.unescape('?id=%201')).to eq('?id= 1')
       end
-      it "should not unescape '+' to spaces in paths" do
-        PostRank::URI.unescape('/foo+bar?id=foo+bar').should == '/foo+bar?id=foo bar'
+      it "does not unescape '+' to spaces in paths" do
+        expect(PostRank::URI.unescape('/foo+bar?id=foo+bar')).to eq('/foo+bar?id=foo bar')
       end
     end
   end
   context "normalize" do
+    let(:igvita) { 'http://igvita.com/' }
     def n(uri)
       PostRank::URI.normalize(uri).to_s
     end
-    it "should normalize paths in PostRank::URIs" do
-      n('http://igvita.com/').should == igvita
-      n('http://igvita.com').to_s.should == igvita
-      n('http://igvita.com///').should == igvita
+    it "normalizes paths in PostRank::URIs" do
+      expect(n('http://igvita.com/')).to eq(igvita)
+      expect(n('http://igvita.com').to_s).to eq(igvita)
+      expect(n('http://igvita.com///')).to eq(igvita)
-      n('http://igvita.com/../').should == igvita
-      n('http://igvita.com/a/b/../../').should == igvita
-      n('http://igvita.com/a/b/../..').should == igvita
+      expect(n('http://igvita.com/../')).to eq(igvita)
+      expect(n('http://igvita.com/a/b/../../')).to eq(igvita)
+      expect(n('http://igvita.com/a/b/../..')).to eq(igvita)
     end
-    it "should normalize query strings in PostRank::URIs" do
-      n('http://igvita.com/?').should == igvita
-      n('http://igvita.com?').should == igvita
-      n('http://igvita.com/a/../?').should == igvita
+    it "normalizes query strings in PostRank::URIs" do
+      expect(n('http://igvita.com/?')).to eq(igvita)
+      expect(n('http://igvita.com?')).to eq(igvita)
+      expect(n('http://igvita.com/a/../?')).to eq(igvita)
     end
-    it "should normalize anchors in PostRank::URIs" do
-      n('http://igvita.com#test').should == igvita
-      n('http://igvita.com#test#test').should == igvita
-      n('http://igvita.com/a/../?#test').should == igvita
+    it "normalizes anchors in PostRank::URIs" do
+      expect(n('http://igvita.com#test')).to eq(igvita)
+      expect(n('http://igvita.com#test#test')).to eq(igvita)
+      expect(n('http://igvita.com/a/../?#test')).to eq(igvita)
     end
-    it "should clean whitespace in PostRank::URIs" do
-      n('http://igvita.com/a/../?  ').should == igvita
-      n('http://igvita.com/a/../? #test').should == igvita
-      n('http://igvita.com/ /../').should == igvita
+    it "cleans whitespace in PostRank::URIs" do
+      expect(n('http://igvita.com/a/../?  ')).to eq(igvita)
+      expect(n('http://igvita.com/a/../? #test')).to eq(igvita)
+      expect(n('http://igvita.com/ /../')).to eq(igvita)
     end
-    it "should default to http scheme if missing" do
-      n('igvita.com').should == igvita
-      n('https://test.com/').to_s.should == 'https://test.com/'
+    it "defaults to http scheme if missing" do
+      expect(n('igvita.com')).to eq(igvita)
+      expect(n('https://test.com/').to_s).to eq('https://test.com/')
     end
-    it "should downcase hostname" do
-      n('IGVITA.COM').should == igvita
-      n('IGVITA.COM/ABC').should == (igvita + "ABC")
+    it "downcases the hostname" do
+      expect(n('IGVITA.COM')).to eq(igvita)
+      expect(n('IGVITA.COM/ABC')).to eq(igvita + "ABC")
     end
-    it "should remove trailing slash on paths" do
-      n('http://igvita.com/').should == 'http://igvita.com/'
+    it "removes trailing slash on paths" do
+      expect(n('http://igvita.com/')).to eq('http://igvita.com/')
-      n('http://igvita.com/a').should == 'http://igvita.com/a'
-      n('http://igvita.com/a/').should == 'http://igvita.com/a'
+      expect(n('http://igvita.com/a')).to eq('http://igvita.com/a')
+      expect(n('http://igvita.com/a/')).to eq('http://igvita.com/a')
-      n('http://igvita.com/a/b').should == 'http://igvita.com/a/b'
-      n('http://igvita.com/a/b/').should == 'http://igvita.com/a/b'
+      expect(n('http://igvita.com/a/b')).to eq('http://igvita.com/a/b')
+      expect(n('http://igvita.com/a/b/')).to eq('http://igvita.com/a/b')
     end
   end
   context "canonicalization" do
@@ -109,60 +107,60 @@ describe PostRank::URI do
     context "query parameters" do
       it "should handle nester parameters" do
-        c('igvita.com/?id=a&utm_source=a').should == 'http://igvita.com/?id=a'
+        expect(c('igvita.com/?id=a&utm_source=a')).to eq('http://igvita.com/?id=a')
       end
-      it "should preserve order of parameters" do
+      it "preserves the order of parameters" do
         url = 'http://a.com/?'+('a'..'z').to_a.shuffle.map {|e| "#{e}=#{e}"}.join("&")
-        c(url).should == url
+        expect(c(url)).to eq(url)
       end
-      it "should remove Google Analytics parameters" do
-        c('igvita.com/?id=a&utm_source=a').should == 'http://igvita.com/?id=a'
-        c('igvita.com/?id=a&utm_source=a&utm_valid').should == 'http://igvita.com/?id=a&utm_valid'
+      it "removes Google Analytics parameters" do
+        expect(c('igvita.com/?id=a&utm_source=a')).to eq('http://igvita.com/?id=a')
+        expect(c('igvita.com/?id=a&utm_source=a&utm_valid')).to eq('http://igvita.com/?id=a&utm_valid')
       end
-      it "should remove awesm/sms parameters" do
-        c('igvita.com/?id=a&utm_source=a&awesm=b').should == 'http://igvita.com/?id=a'
-        c('igvita.com/?id=a&sms_ss=a').should == 'http://igvita.com/?id=a'
+      it "removes awesm/sms parameters" do
+        expect(c('igvita.com/?id=a&utm_source=a&awesm=b')).to eq('http://igvita.com/?id=a')
+        expect(c('igvita.com/?id=a&sms_ss=a')).to eq('http://igvita.com/?id=a')
       end
-      it "should remove PHPSESSID parameter" do
-        c('http://www.nachi.org/forum?PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd').should == 'http://www.nachi.org/forum?'
-        c('http://www.nachi.org/forum/?PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd').should == 'http://www.nachi.org/forum/?'
-        c('http://www.nachi.org/forum?id=123&PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd').should == 'http://www.nachi.org/forum?id=123'
+      it "removes PHPSESSID parameter" do
+        expect(c('http://www.nachi.org/forum?PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd')).to eq('http://www.nachi.org/forum?')
+        expect(c('http://www.nachi.org/forum/?PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd')).to eq('http://www.nachi.org/forum/?')
+        expect(c('http://www.nachi.org/forum?id=123&PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd')).to eq('http://www.nachi.org/forum?id=123')
       end
     end
     context "hashbang" do
-      it "should rewrite twitter links to crawlable versions" do
-        c('http://twitter.com/#!/igrigorik').should == 'http://twitter.com/igrigorik'
-        c('http://twitter.com/#!/a/statuses/1').should == 'http://twitter.com/a/statuses/1'
-        c('http://nontwitter.com/#!/a/statuses/1').should == 'http://nontwitter.com/#!/a/statuses/1'
+      it "rewrites twitter links to crawlable versions" do
+        expect(c('http://twitter.com/#!/igrigorik')).to eq('http://twitter.com/igrigorik')
+        expect(c('http://twitter.com/#!/a/statuses/1')).to eq('http://twitter.com/a/statuses/1')
+        expect(c('http://nontwitter.com/#!/a/statuses/1')).to eq('http://nontwitter.com/#!/a/statuses/1')
       end
     end
     context "tumblr" do
-      it "should strip slug" do
-        c('http://test.tumblr.com/post/4533459403/some-text').should == 'http://test.tumblr.com/post/4533459403/'
-        c('http://tumblr.com/xjl2evo3hh').should == 'http://tumblr.com/xjl2evo3hh'
+      it "strips the slug" do
+        expect(c('http://test.tumblr.com/post/4533459403/some-text')).to eq('http://test.tumblr.com/post/4533459403/')
+        expect(c('http://tumblr.com/xjl2evo3hh')).to eq('http://tumblr.com/xjl2evo3hh')
       end
     end
     context "embedded links" do
-      it "should extract embedded redirects from Google News" do
+      it "extracts embedded redirects from Google News" do
         u = c('http://news.google.com/news/url?sa=t&fd=R&&url=http://www.ctv.ca/CTVNews/Politics/20110111/')
-        u.should == 'http://www.ctv.ca/CTVNews/Politics/20110111'
+        expect(u).to eq('http://www.ctv.ca/CTVNews/Politics/20110111')
       end
-      it "should extract embedded redirects from xfruits.com" do
+      it "extracts embedded redirects from xfruits.com" do
         u = c('http://xfruits.com/MrGroar/?url=http%3A%2F%2Faap.lesroyaumes.com%2Fdepeches%2Fdepeche351820908.html')
-        u.should == 'http://aap.lesroyaumes.com/depeches/depeche351820908.html'
+        expect(u).to eq('http://aap.lesroyaumes.com/depeches/depeche351820908.html')
       end
-      it "should extract embedded redirects from MySpace" do
+      it "extracts embedded redirects from MySpace" do
         u = c('http://www.myspace.com/Modules/PostTo/Pages/?u=http%3A%2F%2Fghanaian-chronicle.com%2Fnews%2Fother-news%2Fcanadian-high-commissioner-urges-media%2F&t=Canadian%20High%20Commissioner%20urges%20media')
-        u.should == 'http://ghanaian-chronicle.com/news/other-news/canadian-high-commissioner-urges-media'
+        expect(u).to eq('http://ghanaian-chronicle.com/news/other-news/canadian-high-commissioner-urges-media')
       end
     end
   end
@@ -172,25 +170,42 @@ describe PostRank::URI do
       PostRank::URI.clean(uri)
     end
-    it "should unescape, c14n and normalize" do
-      c('http://igvita.com/?id=1').should == 'http://igvita.com/?id=1'
-      c('igvita.com/?id=1').should == 'http://igvita.com/?id=1'
+    it "unescapes, canonicalizes and normalizes" do
+      expect(c('http://igvita.com/?id=1')).to eq('http://igvita.com/?id=1')
+      expect(c('igvita.com/?id=1')).to eq('http://igvita.com/?id=1')
-      c('http://igvita.com/?id= 1').should == 'http://igvita.com/?id=%201'
-      c('http://igvita.com/?id=+1').should == 'http://igvita.com/?id=%201'
-      c('http://igvita.com/?id%3D%201').should == 'http://igvita.com/?id=%201'
+      expect(c('http://igvita.com/?id= 1')).to eq('http://igvita.com/?id=%201')
+      expect(c('http://igvita.com/?id=+1')).to eq('http://igvita.com/?id=%201')
+      expect(c('http://igvita.com/?id%3D%201')).to eq('http://igvita.com/?id=%201')
-      c('igvita.com/a/..?id=1&utm_source=a&awesm=b#c').should == 'http://igvita.com/?id=1'
+      expect(c('igvita.com/a/..?id=1&utm_source=a&awesm=b#c')).to eq('http://igvita.com/?id=1')
-      c('igvita.com?id=<>').should == 'http://igvita.com/?id=%3C%3E'
-      c('igvita.com?id="').should == 'http://igvita.com/?id=%22'
+      expect(c('igvita.com?id=<>')).to eq('http://igvita.com/?id=%3C%3E')
+      expect(c('igvita.com?id="')).to eq('http://igvita.com/?id=%22')
-      c('test.tumblr.com/post/23223/text-stub').should == 'http://test.tumblr.com/post/23223'
+      expect(c('test.tumblr.com/post/23223/text-stub')).to eq('http://test.tumblr.com/post/23223')
     end
-    it "should clean host specific parameters" do
+    it "cleans host specific parameters" do
       YAML.load_file('spec/c14n_hosts.yml').each do |orig, clean|
-        c(orig).should == clean
+        expect(c(orig)).to eq(clean)
+      end
+    end
+    context "reserved characters" do
+      it "preserves encoded question marks" do
+        expect(c('http://en.wikipedia.org/wiki/Whose_Line_Is_It_Anyway%3F_%28U.S._TV_series%29')).
+          to eq('http://en.wikipedia.org/wiki/Whose_Line_Is_It_Anyway%3F_(U.S._TV_series)')
+      end
+      it "preserves encoded ampersands" do
+        expect(c('http://example.com/?foo=BAR%26BAZ')).
+          to eq('http://example.com/?foo=BAR%26BAZ')
+      end
+      it "preserves consecutive reserved characters" do
+        expect(c('http://example.com/so-quizical%3F%3F%3F?foo=bar')).
+          to eq('http://example.com/so-quizical%3F%3F%3F?foo=bar')
       end
     end
   end
@@ -200,20 +215,20 @@ describe PostRank::URI do
       PostRank::URI.hash(uri, opts)
     end
-    it "should compute the MD5 hash without cleaning the URI" do
+    it "computes the MD5 hash without cleaning the URI" do
       hash = '55fae8910d312b7878a3201ed653b881'
-      h('http://everburning.com/feed/post/1').should == hash
-      h('everburning.com/feed/post/1').should_not == hash
+      expect(h('http://everburning.com/feed/post/1')).to eq(hash)
+      expect(h('everburning.com/feed/post/1')).not_to eq(hash)
     end
-    it "should normalize the URI if requested and compute MD5 hash" do
+    it "normalizes the URI if requested and compute MD5 hash" do
       hash = '55fae8910d312b7878a3201ed653b881'
-      h('http://EverBurning.Com/feed/post/1', :clean => true).should == hash
-      h('Everburning.com/feed/post/1', :clean => true).should == hash
-      h('everburning.com/feed/post/1', :clean => true).should == hash
-      h('everburning.com/feed/post/1/', :clean => true).should == hash
+      expect(h('http://EverBurning.Com/feed/post/1', :clean => true)).to eq(hash)
+      expect(h('Everburning.com/feed/post/1', :clean => true)).to eq(hash)
+      expect(h('everburning.com/feed/post/1', :clean => true)).to eq(hash)
+      expect(h('everburning.com/feed/post/1/', :clean => true)).to eq(hash)
     end
   end
@@ -223,81 +238,81 @@ describe PostRank::URI do
     end
     context "TLDs" do
-      it "should not pick up bad grammar as a domain name and think it has a link" do
-        e("yah.lets").should be_empty
+      it "does not pick up bad grammar as a domain name and think it has a link" do
+        expect(e("yah.lets")).to be_empty
       end
-      it "should not pickup bad TLDS" do
-        e('stuff.zz a.b.c d.zq').should be_empty
+      it "does not pickup bad TLDS" do
+        expect(e('stuff.zz a.b.c d.zq')).to be_empty
       end
     end
-    it "should extract twitter links with hashbangs" do
-      e('test http://twitter.com/#!/igrigorik').should include('http://twitter.com/igrigorik')
+    it "extracts twitter links with hashbangs" do
+      expect(e('test http://twitter.com/#!/igrigorik')).to include('http://twitter.com/igrigorik')
     end
-    it "should extract mobile twitter links with hashbangs" do
-      e('test http://mobile.twitter.com/#!/_mm6').should include('http://mobile.twitter.com/_mm6')
+    it "extracts mobile twitter links with hashbangs" do
+      expect(e('test http://mobile.twitter.com/#!/_mm6')).to include('http://mobile.twitter.com/_mm6')
     end
-    it "should handle a URL that comes after text without a space" do
-      e("text:http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
-      e("text;http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
-      e("text.http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
-      e("text-http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
+    it "handles a URL that comes after text without a space" do
+      expect(e("text:http://spn.tw/tfnLT")).to include("http://spn.tw/tfnLT")
+      expect(e("text;http://spn.tw/tfnLT")).to include("http://spn.tw/tfnLT")
+      expect(e("text.http://spn.tw/tfnLT")).to include("http://spn.tw/tfnLT")
+      expect(e("text-http://spn.tw/tfnLT")).to include("http://spn.tw/tfnLT")
     end
-    it "should not pick up anything on or after the first . in the path of a URL with a shortener domain" do
-      e("http://bit.ly/9cJ2mz......if ur pickin up anythign here, u FAIL.").should == ["http://bit.ly/9cJ2mz"]
+    it "does not pick up anything on or after the first . in the path of a URL with a shortener domain" do
+      expect(e("http://bit.ly/9cJ2mz......if ur pickin up anythign here, u FAIL.")).to eq(["http://bit.ly/9cJ2mz"])
     end
-    it "should pickup urls without protocol" do
+    it "picks up urls without protocol" do
       u = e('abc.com abc.co')
-      u.should include('http://abc.com/')
-      u.should include('http://abc.co/')
+      expect(u).to include('http://abc.com/')
+      expect(u).to include('http://abc.co/')
     end
-    it "should pickup urls inside tags" do
+    it "picks up urls inside tags" do
       u = e("<a href='http://bit.ly/3fds3'>abc.com</a>")
-      u.should include('http://abc.com/')
+      expect(u).to include('http://abc.com/')
     end
     context "multibyte characters" do
-      it "should stop extracting URLs at the full-width CJK space character" do
-        e("http://www.youtube.com/watch?v=w_j4Lda25jA　　とんかつ定食").should == ["http://www.youtube.com/watch?v=w_j4Lda25jA"]
+      it "stops extracting URLs at the full-width CJK space character" do
+        expect(e("http://www.youtube.com/watch?v=w_j4Lda25jA　　とんかつ定食")).to eq(["http://www.youtube.com/watch?v=w_j4Lda25jA"])
       end
     end
   end
   context "href extract" do
-    it "should extract links from html text" do
+    it "extracts links from html text" do
       g,b = PostRank::URI.extract_href("<a href='google.com'>link to google</a> with text <a href='b.com'>stuff</a>")
-      g.first.should == 'http://google.com/'
-      b.first.should == 'http://b.com/'
+      expect(g.first).to eq('http://google.com/')
+      expect(b.first).to eq('http://b.com/')
-      g.last.should == 'link to google'
-      b.last.should == 'stuff'
+      expect(g.last).to eq('link to google')
+      expect(b.last).to eq('stuff')
     end
-    it "should handle empty hrefs" do
-      lambda do
+    it "handles empty hrefs" do
+      expect do
         l = PostRank::URI.extract_href("<a>link to google</a> with text <a href=''>stuff</a>")
-        l.should be_empty
-      end.should_not raise_error
+        expect(l).to be_empty
+      end.not_to raise_error
     end
     context "relative paths" do
-      it "should reject relative paths" do
+      it "rejects relative paths" do
         l = PostRank::URI.extract_href("<a href='/stuff'>link to stuff</a>")
-        l.should be_empty
+        expect(l).to be_empty
       end
-      it "should resolve relative paths if host is provided" do
+      it "resolves relative paths if host is provided" do
         i = PostRank::URI.extract_href("<a href='/stuff'>link to stuff</a>", "igvita.com").first
-        i.first.should == 'http://igvita.com/stuff'
-        i.last.should == 'link to stuff'
+        expect(i.first).to eq('http://igvita.com/stuff')
+        expect(i.last).to eq('link to stuff')
       end
     end
@@ -322,51 +337,51 @@ describe PostRank::URI do
       }
       url_list.each_pair do |url, expected_result|
-        it "should extract #{expected_result.inspect} from #{url}" do
+        it "extracts #{expected_result.inspect} from #{url}" do
           u = PostRank::URI.clean(url, :raw => true)
-          u.domain.should == expected_result
+          expect(u.domain).to eq(expected_result)
         end
       end
     end
   end
   context "parse" do
-    it 'should not fail on large host-part look-alikes' do
-      PostRank::URI.parse('a'*64+'.ca').host.should == nil
+    it 'does not fail on large host-part look-alikes' do
+      expect(PostRank::URI.parse('a'*64+'.ca').host).to eq(nil)
     end
-    it 'should not pancake javascript scheme URIs' do
-      PostRank::URI.parse('javascript:void(0);').scheme.should == 'javascript'
+    it 'does not pancake javascript scheme URIs' do
+      expect(PostRank::URI.parse('javascript:void(0);').scheme).to eq('javascript')
     end
-    it 'should not pancake mailto scheme URIs' do
-      PostRank::URI.parse('mailto:void(0);').scheme.should == 'mailto'
+    it 'does not pancake mailto scheme URIs' do
+      expect(PostRank::URI.parse('mailto:void(0);').scheme).to eq('mailto')
     end
-    it 'should not pancake xmpp scheme URIs' do
-      PostRank::URI.parse('xmpp:void(0);').scheme.should == 'xmpp'
+    it 'does not pancake xmpp scheme URIs' do
+      expect(PostRank::URI.parse('xmpp:void(0);').scheme).to eq('xmpp')
     end
   end
   context 'valid?' do
     it 'marks incomplete URI string as invalid' do
-      PostRank::URI.valid?('/path/page.html').should be_false
+      expect(PostRank::URI.valid?('/path/page.html')).to be false
     end
     it 'marks www.test.c as invalid' do
-      PostRank::URI.valid?('http://www.test.c').should be_false
+      expect(PostRank::URI.valid?('http://www.test.c')).to be false
     end
     it 'marks www.test.com as valid' do
-      PostRank::URI.valid?('http://www.test.com').should be_true
+      expect(PostRank::URI.valid?('http://www.test.com')).to be true
     end
     it 'marks Unicode domain as valid (NOTE: works only with a scheme)' do
-      PostRank::URI.valid?('http://президент.рф').should be_true
+      expect(PostRank::URI.valid?('http://президент.рф')).to be true
     end
     it 'marks punycode domain domain as valid' do
-      PostRank::URI.valid?('xn--d1abbgf6aiiy.xn--p1ai').should be_true
+      expect(PostRank::URI.valid?('xn--d1abbgf6aiiy.xn--p1ai')).to be true
     end
   end
 end

metadata CHANGED

@@ -1,71 +1,123 @@
 --- !ruby/object:Gem::Specification
 name: postrank-uri
 version: !ruby/object:Gem::Version
-  version: 1.0.18
+  version: 1.0.20
 platform: ruby
 authors:
 - Ilya Grigorik
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-04-10 00:00:00.000000000 Z
+date: 2017-03-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: addressable
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ~>
+    - - ">="
       - !ruby/object:Gem::Version
         version: 2.3.0
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: '2.6'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ~>
+    - - ">="
       - !ruby/object:Gem::Version
         version: 2.3.0
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: '2.6'
 - !ruby/object:Gem::Dependency
   name: public_suffix
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ~>
+    - - ">="
       - !ruby/object:Gem::Version
-        version: 1.1.3
+        version: 2.0.0
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: '2.1'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ~>
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+    - - "<"
       - !ruby/object:Gem::Version
-        version: 1.1.3
+        version: '2.1'
 - !ruby/object:Gem::Dependency
   name: nokogiri
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ~>
+    - - ">="
       - !ruby/object:Gem::Version
         version: 1.6.1
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: '1.8'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ~>
+    - - ">="
       - !ruby/object:Gem::Version
         version: 1.6.1
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: '1.8'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: rspec
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - '>='
+    - - ">="
       - !ruby/object:Gem::Version
         version: '0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - '>='
+    - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: appraisal
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: '3.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: '3.0'
 description: URI normalization, c14n, escaping, and extraction
 email:
 - ilya@igvita.com
@@ -73,9 +125,11 @@ executables: []
 extensions: []
 extra_rdoc_files: []
 files:
-- .gitignore
-- .rspec
+- ".gitignore"
+- ".rspec"
+- Appraisals
 - Gemfile
+- LICENSE
 - README.md
 - Rakefile
 - lib/postrank-uri.rb
@@ -86,7 +140,8 @@ files:
 - spec/helper.rb
 - spec/postrank-uri_spec.rb
 homepage: http://github.com/postrank-labs/postrank-uri
-licenses: []
+licenses:
+- MIT
 metadata: {}
 post_install_message:
 rdoc_options: []
@@ -94,17 +149,17 @@ require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
-  - - '>='
+  - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - '>='
+  - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
 rubyforge_project: postrank-uri
-rubygems_version: 2.0.6
+rubygems_version: 2.6.8
 signing_key:
 specification_version: 4
 summary: URI normalization, c14n, escaping, and extraction
@@ -112,4 +167,3 @@ test_files:
 - spec/c14n_hosts.yml
 - spec/helper.rb
 - spec/postrank-uri_spec.rb
-has_rdoc: