RubyGems - postrank-uri - Versions diffs - 1.0.0 → 1.0.24 - Mend

postrank-uri 1.0.0 → 1.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +7 -0
data/.gitignore +3 -0
data/.rspec +0 -0
data/.travis.yml +15 -0
data/Appraisals +15 -0
data/LICENSE +21 -0
data/README.md +32 -5
data/Rakefile +3 -4
data/lib/postrank-uri/{c18n.yml → c14n.yml} +20 -1
data/lib/postrank-uri/version.rb +1 -1
data/lib/postrank-uri.rb +148 -34
data/postrank-uri.gemspec +10 -4
data/spec/{c18n_hosts.yml → c14n_hosts.yml} +30 -3
data/spec/helper.rb +4 -1
data/spec/postrank-uri_spec.rb +287 -82
metadata +119 -75

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 8ea2cc7f1dc58cb559b9168ff0f83150f1ec6119
+  data.tar.gz: 73d97d1f7c56b4b0644eb9a8ad54490ca1561fbf
+SHA512:
+  metadata.gz: 1fa2d5475a617ab8181554f4d15f4d72a1c67f40bb60c16502a575e16de6d721edcbe0c6ca1e5c331588510e0dd1cced56ea9dd4704dc7e9ab59b71c6a6385a5
+  data.tar.gz: 6ab0bf3e698d99127db88528a8fefba1b4d4c7667a6c42cc256a71dadb2dd78dd1f080e206312cc395b9a9c3c68b1fb62335c2f6a48346871cd85466ac86660b

data/.gitignore ADDED Viewed

@@ -0,0 +1,3 @@
+pkg
+Gemfile.lock
+/gemfiles

data/.rspec ADDED Viewed

File without changes

data/.travis.yml ADDED Viewed

@@ -0,0 +1,15 @@
+language: ruby
+cache: bundler
+rvm:
+  - 2.3.8
+  - 2.4.5
+  - 2.5.3
+  - 2.6.1
+before_install:
+  - gem install bundler
+install:
+  - bundle install --jobs=3 --retry=3
+  - bundle exec appraisal install
+script:
+  - bundle exec rake
+  - bundle exec rake appraisal

data/Appraisals ADDED Viewed

@@ -0,0 +1,15 @@
+nokogiri_versions = ["1.8", "1.9", "1.10"]
+nokogiri_versions.each do |version|
+  appraise "nokogiri-#{version}" do
+    gem "nokogiri", "~> #{version}.0"
+  end
+end
+addressable_versions = ["2.4", "2.5", "2.6"]
+addressable_versions.each do |version|
+  appraise "addressable-#{version}" do
+    gem "addressable", "~> #{version}.0"
+  end
+end

data/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2011 Ilya Grigorik
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/README.md CHANGED Viewed

@@ -1,6 +1,8 @@
 # PostRank URI
-A collection of convenience methods (Ruby 1.8 & Ruby 1.9) for dealing with extracting, (un)escaping, normalization, and canonicalization of URIs. At PostRank we process over 20M URI associated activities each day, and we need to make sure that we can reliably extract the URIs from a variety of text formats, deal with all the numerous and creative ways users like to escape and unescape their URIs, normalize the resulting URIs, and finally apply a set of custom canonicalization rules to make sure that we can cross-reference when the users are talking about the same URL.
+[![Gem Version](https://badge.fury.io/rb/postrank-uri.svg)](https://rubygems.org/gems/postrank-uri) [![Build Status](https://travis-ci.org/postrank-labs/postrank-uri.svg?branch=master)](https://travis-ci.org/postrank-labs/postrank-uri)
+A collection of convenience methods (Ruby 2.3+) for dealing with extracting, (un)escaping, normalization, and canonicalization of URIs. At PostRank we process over 20M URI associated activities each day, and we need to make sure that we can reliably extract the URIs from a variety of text formats, deal with all the numerous and creative ways users like to escape and unescape their URIs, normalize the resulting URIs, and finally apply a set of custom canonicalization rules to make sure that we can cross-reference when the users are talking about the same URL.
 In a nutshell, we need to make sure that creative cases like the ones below all resolve to same URI:
@@ -14,7 +16,7 @@ In a nutshell, we need to make sure that creative cases like the ones below all
 ## API
 - **PostRank::URI.extract(text)** - Detect URIs in text, discard bad TLD's
-- **PostRank::URI.clean(uri)** - Unescape, normalize, apply c18n filters - 95% use case.
+- **PostRank::URI.clean(uri)** - Unescape, normalize, apply c14n filters - 95% use case.
 - **PostRank::URI.normalize(uri)** - Apply RFC normalization rules, discard extra path characters, drop anchors
 - **PostRank::URI.unescape(uri)** - Unescape URI entities, handle +/%20's, etc
@@ -33,8 +35,33 @@ In a nutshell, we need to make sure that creative cases like the ones below all
         [0] "http://link.to/?a=b"
     ]
-## C18N
+## C14N
+As part of URI canonicalization the library will remove common tracking parameters from Google Analytics and several other providers. Beyond that, host-specific rules are also applied. For example, nytimes.com likes to add a 'partner' query parameter for tracking purposes, but which has no effect on the content - hence, it is removed from the URI. For full list, see the c14n.yml file.
+Detecting "duplicate URLs" is a hard problem to solve (expensive in all senses), instead we are compiling a manually assembled database. If you find cases which are missing, please do report them, or send us a pull request!
+## Development
+### Setup
+```
+bundle install
+```
+### Running tests
+```
+bundle exec rake
+```
+### Running dependency appraisals
+To verify `postrake-uri` works with different versions of its runtime dependencies you can run:
-As part of URI canonicalization the library will remove common tracking parameters from Google Analytics and several other providers. Beyond that, host-specific rules are also applied. For example, nytimes.com likes to add a 'partner' query parameter for tracking purposes, but which has no effect on the content - hence, it is removed from the URI. For full list, see the c18n.yml file.
+```
+bundle exec appraisal install
+bundle exec rake appraisal
+```
-Detecting "duplicate URLs" is a hard problem to solve (expensive in all senses), instead we are compiling a manually assembled database. If you find cases which are missing, please do report them, or send us a pull request!
+This will execute the test suite with different versions of the dependencies.

data/Rakefile CHANGED Viewed

@@ -1,9 +1,8 @@
 require 'bundler'
 Bundler::GemHelper.install_tasks
-require 'rspec'
 require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new(:spec)
+task :default => :spec
-Rspec::Core::RakeTask.new do |t|
-  t.rspec_opts = '--color'
-end
+require 'appraisal'

data/lib/postrank-uri/{c18n.yml → c14n.yml} RENAMED Viewed

@@ -7,12 +7,19 @@
 - utm_campaign  # Google Analytics: campaign name
 - sms_ss        # addthis.com tracker
 - awesm         # awe.sm tracker
+- xtor          # AT Internet tracker
+- PHPSESSID     # Legacy PHP session identifier
 :hosts:
   nytimes.com:
   - partner
+  - pagewanted
   - emc
   - _r
+  - ref
+  - src
+  diepresse.com:
+  - _vl_backlink
   washingtonpost.com:
   - nav
   - wprss
@@ -34,4 +41,16 @@
   welt.de:
   - wtmc
   usatoday.com:
-  - csp
+  - csp
+  cnet.com:
+  - part
+  - subj
+  - tag
+  wsj.com:
+  - mod
+  allthingsd.com:
+  - mod
+  waomarketing.com:
+  - nucrss
+  youtube.com:
+  - feature

data/lib/postrank-uri/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module PostRank
   module URI
-    VERSION = "1.0.0"
+    VERSION = "1.0.24"
   end
 end

data/lib/postrank-uri.rb CHANGED Viewed

@@ -1,22 +1,48 @@
-# -*- encoding: utf-8 -*-
+# encoding: utf-8
 require 'addressable/uri'
-require 'domainatrix'
+require 'digest/md5'
+require 'nokogiri'
+require 'public_suffix'
 require 'yaml'
+module Addressable
+  class URI
+    def domain
+      host = self.host
+      (host && PublicSuffix.valid?(host, default_rule: nil)) ? PublicSuffix.parse(host).domain : nil
+    end
+    def normalized_query
+      @normalized_query ||= (begin
+        if self.query && self.query.strip != ''
+          (self.query.strip.split("&", -1).map do |pair|
+            Addressable::URI.normalize_component(
+              pair,
+              Addressable::URI::CharacterClasses::QUERY.sub("\\&", "")
+            )
+          end).join("&")
+        else
+          nil
+        end
+      end)
+    end
+  end
+end
 module PostRank
   module URI
-    c18ndb = YAML.load_file(File.dirname(__FILE__) + '/postrank-uri/c18n.yml')
+    c14ndb = YAML.load_file(File.dirname(__FILE__) + '/postrank-uri/c14n.yml')
-    C18N = {}
-    C18N[:global] = c18ndb[:all].freeze
-    C18N[:hosts]  = c18ndb[:hosts].inject({}) {|h,(k,v)| h[/#{Regexp.escape(k)}$/.freeze] = v; h}
+    C14N = {}
+    C14N[:global] = c14ndb[:all].freeze
+    C14N[:hosts]  = c14ndb[:hosts].inject({}) {|h,(k,v)| h[/#{Regexp.escape(k)}$/.freeze] = v; h}
     URIREGEX = {}
     URIREGEX[:protocol] = /https?:\/\//i
     URIREGEX[:valid_preceding_chars] = /(?:|\.|[^-\/"':!=A-Z0-9_@＠]|^|\:)/i
-    URIREGEX[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i
+    URIREGEX[:valid_domain] = /\b(?:[a-z0-9-]{1,63}\.){1,}[a-z]{2,63}(?::[0-9]+)?/i
     URIREGEX[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~]/i
     # Allow URL paths to contain balanced parens
@@ -60,67 +86,155 @@ module PostRank
           )
         }iox;
+    URIREGEX[:reserved_characters] = /%3F|%26/i
     URIREGEX[:escape]   = /([^ a-zA-Z0-9_.-]+)/x
-    URIREGEX[:unescape] = /((?:%[0-9a-fA-F]{2})+)/x
+    URIREGEX[:unescape] = /(%[0-9a-fA-F]{2})/x
+    URIREGEX[:double_slash_outside_scheme] = /(?<!http:|https:)\/{2}/x
     URIREGEX.each_pair{|k,v| v.freeze }
-    def self.extract(text)
+    module_function
+    def extract(text)
       return [] if !text
       urls = []
       text.to_s.scan(URIREGEX[:valid_url]) do |all, before, url, protocol, domain, path, query|
-        begin
-          url = clean(url).to_s
-          Domainatrix.parse(url)
-          urls.push url
-        rescue NoMethodError
+        # Only extract the URL if the domain is valid
+        if PublicSuffix.valid?(domain, default_rule: nil)
+          url = clean(url)
+          urls.push url.to_s
         end
       end
       urls.compact
     end
-    def self.escape(uri)
+    def extract_href(text, host = nil)
+      urls = []
+      Nokogiri.HTML(text).search('a').each do |a|
+        begin
+          url = clean(a.attr('href'), :raw => true, :host => host)
+          next unless url.absolute?
+          urls.push [url.to_s, a.text]
+        rescue
+          next
+        end
+      end
+      urls
+    end
+    def escape(uri)
       uri.gsub(URIREGEX[:escape]) do
         '%' + $1.unpack('H2' * $1.size).join('%').upcase
       end.gsub(' ','%20')
     end
-    def self.unescape(uri)
-      uri.tr('+', ' ').gsub(URIREGEX[:unescape]) do
-        [$1.delete('%')].pack('H*')
+    def unescape(uri)
+      u = parse(uri)
+      u.query = u.query.tr('+', ' ') if u.query
+      u.to_s.gsub(URIREGEX[:unescape]) do |encoded|
+        if !encoded.match(URIREGEX[:reserved_characters]).nil?
+          encoded
+        else
+          [encoded.delete('%')].pack('H*')
+        end
       end
     end
-    def self.clean(uri)
-      normalize(c18n(unescape(uri))).to_s
+    def clean(uri, opts = {})
+      uri = normalize(c14n(unescape(uri), opts))
+      opts[:raw] ? uri : uri.to_s
     end
-    def self.normalize(uri)
-      u = parse(uri)
-      u.path = u.path.squeeze('/')
+    def hash(uri, opts = {})
+      Digest::MD5.hexdigest(opts[:clean] == true ? clean(uri) : uri)
+    end
+    def normalize(uri, opts = {})
+      u = parse(uri, opts)
+      u.path = u.path.gsub(URIREGEX[:double_slash_outside_scheme], '/')
+      u.path = u.path.chomp('/') if u.path.size != 1
       u.query = nil if u.query && u.query.empty?
       u.fragment = nil
       u
     end
-    def self.c18n(uri)
-      u = parse(uri)
+    def c14n(uri, opts = {})
+      u = parse(uri, opts)
+      u = embedded(u)
-      if q = u.query_values(:notation => :flat_array)
-        q.delete_if { |k,v| C18N[:global].include?(k) }
-        q.delete_if { |k,v| C18N[:hosts].find {|r,p| u.host =~ r && p.include?(k) } }
+      if q = u.query_values(Array)
+        q.delete_if { |k,v| C14N[:global].include?(k) }
+        q.delete_if { |k,v| C14N[:hosts].find {|r,p| u.host =~ r && p.include?(k) } }
       end
       u.query_values = q
+      if u.host =~ /^(mobile\.)?twitter\.com$/ && u.fragment && u.fragment.match(/^!(.*)/)
+        u.fragment = nil
+        u.path = $1
+      end
+      if u.host =~ /tumblr\.com$/ && u.path =~ /\/post\/\d+\//
+        u.path = u.path.gsub(/[^\/]+$/, '')
+      end
       u
     end
-    def self.parse(uri)
+    def embedded(uri)
+      embedded = if uri.host == 'news.google.com' && uri.path == '/news/url' \
+         || uri.host == 'xfruits.com'
+        uri.query_values['url']
+      elsif uri.host =~ /myspace\.com/ && uri.path =~ /PostTo/
+        embedded = uri.query_values['u']
+      end
+      uri = clean(embedded, :raw => true) if embedded
+      uri
+    end
+    def parse(uri, opts = {})
       return uri if uri.is_a? Addressable::URI
-      uri = uri.index(URIREGEX[:protocol]) == 0 ? uri : "http://#{uri}"
-      Addressable::URI.parse(uri).normalize
+      uri = Addressable::URI.parse(uri)
+      if !uri.host && uri.scheme !~ /^javascript|mailto|xmpp$/
+        if uri.scheme
+          # With no host and scheme yes, the parser exploded
+          return parse("http://#{uri}", opts)
+        end
+        if opts[:host]
+          uri.host = opts[:host]
+        else
+          parts = uri.path.to_s.split(/[\/:]/)
+          if parts.first =~ URIREGEX[:valid_domain]
+            host = parts.shift
+            uri.path = '/' + parts.join('/')
+            uri.host = host
+          end
+        end
+      end
+      uri.scheme = 'http' if uri.host && !uri.scheme
+      uri.normalize!
     end
+    def valid?(uri)
+      # URI is only valid if it is not nil, parses cleanly as a URI,
+      # and the domain has a recognized, valid TLD component
+      return false if uri.nil?
+      is_valid = false
+      cleaned_uri = clean(uri, :raw => true)
+      if host = cleaned_uri.host
+        is_valid = PublicSuffix.valid?(Addressable::IDNA.to_unicode(host), default_rule: nil)
+      end
+      is_valid
+    end
   end
-end
+end

data/postrank-uri.gemspec CHANGED Viewed

@@ -8,15 +8,21 @@ Gem::Specification.new do |s|
   s.platform    = Gem::Platform::RUBY
   s.authors     = ["Ilya Grigorik"]
   s.email       = ["ilya@igvita.com"]
-  s.homepage    = "http://rubygems.org/gems/postrank-uri"
-  s.summary     = "URI normalization, c18n, escaping, and extraction"
+  s.homepage    = "http://github.com/postrank-labs/postrank-uri"
+  s.summary     = "URI normalization, c14n, escaping, and extraction"
   s.description = s.summary
+  s.license     = 'MIT'
+  s.required_ruby_version  = ">= 2.3.0"
   s.rubyforge_project = "postrank-uri"
-  s.add_dependency "addressable"
-  s.add_dependency "domainatrix"
+  s.add_dependency "addressable",   ">= 2.4.0"
+  s.add_dependency "public_suffix", ">= 2.0.0", "< 2.1"
+  s.add_dependency "nokogiri",      ">= 1.8.0"
+  s.add_development_dependency "rake"
   s.add_development_dependency "rspec"
+  s.add_development_dependency "appraisal", ">= 2.0.0", "< 3.0"
   s.files         = `git ls-files`.split("\n")
   s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")

data/spec/{c18n_hosts.yml → c14n_hosts.yml} RENAMED Viewed

@@ -2,8 +2,26 @@
 - - http://www.nytimes.com/2010/12/16/world/europe/16russia.html?_r=1&partner=rss&emc=rss
   - http://www.nytimes.com/2010/12/16/world/europe/16russia.html
+- - http://www.nytimes.com/2011/02/20/magazine/20FOB-Medium-t.html?ref=magazine
+  - http://www.nytimes.com/2011/02/20/magazine/20FOB-Medium-t.html
+- - http://www.nytimes.com/2011/03/13/business/13hire.html?pagewanted=1&_r=1&ref=technology
+  - http://www.nytimes.com/2011/03/13/business/13hire.html
+- - http://www.nytimes.com/2011/03/15/business/media/15adco.html?_r=2&src=recg
+  - http://www.nytimes.com/2011/03/15/business/media/15adco.html
+- - http://networkeffect.allthingsd.com/20110308/googles-approach-to-social/?mod=tweet
+  - http://networkeffect.allthingsd.com/20110308/googles-approach-to-social
+- - http://online.wsj.com/article/SB10001424052748704657704576150191661959856.html?mod=WSJ_hp_LEFTWhatsNewsCollection
+  - http://online.wsj.com/article/SB10001424052748704657704576150191661959856.html
+- - http://diepresse.com/home/wirtschaft/636448/Griechenland_Drachme-als-letzte-Rettung?_vl_backlink=%2Fhome
+  - http://diepresse.com/home/wirtschaft/636448/Griechenland_Drachme-als-letzte-Rettung
 - - http://dotearth.blogs.nytimes.com/2010/12/14/beyond-political-science/?partner=rss&emc=rss
-  - http://dotearth.blogs.nytimes.com/2010/12/14/beyond-political-science/
+  - http://dotearth.blogs.nytimes.com/2010/12/14/beyond-political-science
 - - http://www.washingtonpost.com/wp-dyn/content/article/2010/12/14/AR2010121406045.html?nav=rss_email/components
   - http://www.washingtonpost.com/wp-dyn/content/article/2010/12/14/AR2010121406045.html
@@ -30,10 +48,19 @@
   - http://www.dw-world.de/dw/article/0,,6330472,00.html
 - - http://www.repubblica.it/rubriche/il-caso-del-giorno/2010/12/13/news/riscossa_aeffe-10153565/?rss
-  - http://www.repubblica.it/rubriche/il-caso-del-giorno/2010/12/13/news/riscossa_aeffe-10153565/
+  - http://www.repubblica.it/rubriche/il-caso-del-giorno/2010/12/13/news/riscossa_aeffe-10153565
 - - http://www.welt.de/sport/Der-Hoellenritt-des-Fussball-Profis-Jean-Marc-Bosman.html?wtmc=RSS.Sport.Fussball
   - http://www.welt.de/sport/Der-Hoellenritt-des-Fussball-Profis-Jean-Marc-Bosman.html
 - - http://www.usatoday.com/life/television/news/2011-01-19-race19_ST_N.htm?csp=34life
-  - http://www.usatoday.com/life/television/news/2011-01-19-race19_ST_N.htm
+  - http://www.usatoday.com/life/television/news/2011-01-19-race19_ST_N.htm
+- - http://news.cnet.com/8301-17938_105-20029409-1.html?part=rss&subj=news&tag=2547-1_3-0-20
+  - http://news.cnet.com/8301-17938_105-20029409-1.html
+- - http://www.waomarketing.com/blog/at-internet-white-paper-series/?nucrss=1
+  - http://www.waomarketing.com/blog/at-internet-white-paper-series
+- - http://www.youtube.com/watch?v=RRBoPveyETc&feature=player_embedded
+  - http://www.youtube.com/watch?v=RRBoPveyETc

data/spec/helper.rb CHANGED Viewed

@@ -1,4 +1,7 @@
 require 'bundler'
 Bundler.setup
-require 'lib/postrank-uri'
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+$LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')), __FILE__)
+require 'postrank-uri'

data/spec/postrank-uri_spec.rb CHANGED Viewed

@@ -3,145 +3,237 @@
 require 'helper'
 describe PostRank::URI do
-  let(:igvita) { 'http://igvita.com/' }
   context "escaping" do
-    it "should escape PostRank::URI string" do
-      PostRank::URI.escape('id=1').should == 'id%3D1'
+    it "escapes PostRank::URI string" do
+      expect(PostRank::URI.escape('id=1')).to eq('id%3D1')
     end
-    it "should escape spaces as %20's" do
-      PostRank::URI.escape('id= 1').should match('%20')
+    it "escapes spaces as %20's" do
+      expect(PostRank::URI.escape('id= 1')).to match('%20')
     end
   end
   context "unescape" do
-    it "should unescape PostRank::URI" do
-      PostRank::URI.unescape(PostRank::URI.escape('id=1')).should == 'id=1'
+    it "unescapes PostRank::URI" do
+      expect(PostRank::URI.unescape(PostRank::URI.escape('id=1'))).to eq('id=1')
     end
-    it "should unescape PostRank::URI with spaces" do
-      PostRank::URI.unescape(PostRank::URI.escape('id= 1')).should == 'id= 1'
+    it "unescapes PostRank::URI with spaces" do
+      expect(PostRank::URI.unescape(PostRank::URI.escape('id= 1'))).to eq('id= 1')
     end
     context "accept improperly escaped PostRank::URI strings" do
       # See http://tools.ietf.org/html/rfc3986#section-2.3
-      it "should unescape PostRank::URI with spaces encoded as '+'" do
-        PostRank::URI.unescape('id=+1').should == 'id= 1'
+      it "unescapes PostRank::URI with spaces encoded as '+'" do
+        expect(PostRank::URI.unescape('?id=+1')).to eq('?id= 1')
+      end
+      it "unescapes PostRank::URI with spaces encoded as '+'" do
+        expect(PostRank::URI.unescape('?id%3D+1')).to eq('?id= 1')
       end
-      it "should unescape PostRank::URI with spaces encoded as '+'" do
-        PostRank::URI.unescape('id%3D+1').should == 'id= 1'
+      it "unescapes PostRank::URI with spaces encoded as %20" do
+        expect(PostRank::URI.unescape('?id=%201')).to eq('?id= 1')
       end
-      it "should unescape PostRank::URI with spaces encoded as %20" do
-        PostRank::URI.unescape('id=%201').should == 'id= 1'
+      it "does not unescape '+' to spaces in paths" do
+        expect(PostRank::URI.unescape('/foo+bar?id=foo+bar')).to eq('/foo+bar?id=foo bar')
       end
     end
   end
   context "normalize" do
+    let(:igvita) { 'http://igvita.com/' }
     def n(uri)
       PostRank::URI.normalize(uri).to_s
     end
-    it "should normalize paths in PostRank::URIs" do
-      n('http://igvita.com/').should == igvita
-      n('http://igvita.com').to_s.should == igvita
-      n('http://igvita.com///').should == igvita
+    it "normalizes paths in PostRank::URIs" do
+      expect(n('http://igvita.com/')).to eq(igvita)
+      expect(n('http://igvita.com').to_s).to eq(igvita)
+      expect(n('http://igvita.com///')).to eq(igvita)
-      n('http://igvita.com/../').should == igvita
-      n('http://igvita.com/a/b/../../').should == igvita
-      n('http://igvita.com/a/b/../..').should == igvita
+      expect(n('http://igvita.com/../')).to eq(igvita)
+      expect(n('http://igvita.com/a/b/../../')).to eq(igvita)
+      expect(n('http://igvita.com/a/b/../..')).to eq(igvita)
     end
-    it "should normalize query strings in PostRank::URIs" do
-      n('http://igvita.com/?').should == igvita
-      n('http://igvita.com?').should == igvita
-      n('http://igvita.com/a/../?').should == igvita
+    it "normalizes query strings in PostRank::URIs" do
+      expect(n('http://igvita.com/?')).to eq(igvita)
+      expect(n('http://igvita.com?')).to eq(igvita)
+      expect(n('http://igvita.com/a/../?')).to eq(igvita)
     end
-    it "should normalize anchors in PostRank::URIs" do
-      n('http://igvita.com#test').should == igvita
-      n('http://igvita.com#test#test').should == igvita
-      n('http://igvita.com/a/../?#test').should == igvita
+    it "normalizes anchors in PostRank::URIs" do
+      expect(n('http://igvita.com#test')).to eq(igvita)
+      expect(n('http://igvita.com#test#test')).to eq(igvita)
+      expect(n('http://igvita.com/a/../?#test')).to eq(igvita)
     end
-    it "should clean whitespace in PostRank::URIs" do
-      n('http://igvita.com/a/../?  ').should == igvita
-      n('http://igvita.com/a/../? #test').should == igvita
-      n('http://igvita.com/ /../').should == igvita
+    it "cleans whitespace in PostRank::URIs" do
+      expect(n('http://igvita.com/a/../?  ')).to eq(igvita)
+      expect(n('http://igvita.com/a/../? #test')).to eq(igvita)
+      expect(n('http://igvita.com/ /../')).to eq(igvita)
     end
-    it "should default to http scheme if missing" do
-      n('igvita.com').should == igvita
-      n('https://test.com/').to_s.should == 'https://test.com/'
+    it "defaults to http scheme if missing" do
+      expect(n('igvita.com')).to eq(igvita)
+      expect(n('https://test.com/').to_s).to eq('https://test.com/')
     end
-    it "should downcase hostname" do
-      n('IGVITA.COM').should == igvita
-      n('IGVITA.COM/ABC').should == (igvita + "ABC")
+    it "downcases the hostname" do
+      expect(n('IGVITA.COM')).to eq(igvita)
+      expect(n('IGVITA.COM/ABC')).to eq(igvita + "ABC")
     end
+    it "removes trailing slash on paths" do
+      expect(n('http://igvita.com/')).to eq('http://igvita.com/')
+      expect(n('http://igvita.com/a')).to eq('http://igvita.com/a')
+      expect(n('http://igvita.com/a/')).to eq('http://igvita.com/a')
+      expect(n('http://igvita.com/a/b')).to eq('http://igvita.com/a/b')
+      expect(n('http://igvita.com/a/b/')).to eq('http://igvita.com/a/b')
+    end
+    it 'preserves nested urls' do
+      expect(n('http://igvita.com/a/b/http://hello.com')).to eq('http://igvita.com/a/b/http://hello.com')
+      expect(n('http://igvita.com/a//b/https://hello.com')).to eq('http://igvita.com/a/b/https://hello.com')
+    end
   end
   context "canonicalization" do
     def c(uri)
-      PostRank::URI.c18n(uri).to_s
+      PostRank::URI.c14n(uri).to_s
     end
     context "query parameters" do
       it "should handle nester parameters" do
-        c('igvita.com/?id=a&utm_source=a').should == 'http://igvita.com/?id=a'
+        expect(c('igvita.com/?id=a&utm_source=a')).to eq('http://igvita.com/?id=a')
       end
-      it "should preserve order of parameters" do
+      it "preserves the order of parameters" do
         url = 'http://a.com/?'+('a'..'z').to_a.shuffle.map {|e| "#{e}=#{e}"}.join("&")
-        c(url).should == url
+        expect(c(url)).to eq(url)
       end
-      it "should remove Google Analytics parameters" do
-        c('igvita.com/?id=a&utm_source=a').should == 'http://igvita.com/?id=a'
-        c('igvita.com/?id=a&utm_source=a&utm_valid').should == 'http://igvita.com/?id=a&utm_valid'
+      it "removes Google Analytics parameters" do
+        expect(c('igvita.com/?id=a&utm_source=a')).to eq('http://igvita.com/?id=a')
+        expect(c('igvita.com/?id=a&utm_source=a&utm_valid')).to eq('http://igvita.com/?id=a&utm_valid')
       end
-      it "should remove awesm/sms parameters" do
-        c('igvita.com/?id=a&utm_source=a&awesm=b').should == 'http://igvita.com/?id=a'
-        c('igvita.com/?id=a&sms_ss=a').should == 'http://igvita.com/?id=a'
+      it "removes awesm/sms parameters" do
+        expect(c('igvita.com/?id=a&utm_source=a&awesm=b')).to eq('http://igvita.com/?id=a')
+        expect(c('igvita.com/?id=a&sms_ss=a')).to eq('http://igvita.com/?id=a')
       end
+      it "removes PHPSESSID parameter" do
+        expect(c('http://www.nachi.org/forum?PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd')).to eq('http://www.nachi.org/forum?')
+        expect(c('http://www.nachi.org/forum/?PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd')).to eq('http://www.nachi.org/forum/?')
+        expect(c('http://www.nachi.org/forum?id=123&PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd')).to eq('http://www.nachi.org/forum?id=123')
+      end
+    end
+    context "hashbang" do
+      it "rewrites twitter links to crawlable versions" do
+        expect(c('http://twitter.com/#!/igrigorik')).to eq('http://twitter.com/igrigorik')
+        expect(c('http://twitter.com/#!/a/statuses/1')).to eq('http://twitter.com/a/statuses/1')
+        expect(c('http://nontwitter.com/#!/a/statuses/1')).to eq('http://nontwitter.com/#!/a/statuses/1')
+      end
+    end
+    context "tumblr" do
+      it "strips the slug" do
+        expect(c('http://test.tumblr.com/post/4533459403/some-text')).to eq('http://test.tumblr.com/post/4533459403/')
+        expect(c('http://tumblr.com/xjl2evo3hh')).to eq('http://tumblr.com/xjl2evo3hh')
+      end
+    end
+    context "embedded links" do
+      it "extracts embedded redirects from Google News" do
+        u = c('http://news.google.com/news/url?sa=t&fd=R&&url=http://www.ctv.ca/CTVNews/Politics/20110111/')
+        expect(u).to eq('http://www.ctv.ca/CTVNews/Politics/20110111')
+      end
+      it "extracts embedded redirects from xfruits.com" do
+        u = c('http://xfruits.com/MrGroar/?url=http%3A%2F%2Faap.lesroyaumes.com%2Fdepeches%2Fdepeche351820908.html')
+        expect(u).to eq('http://aap.lesroyaumes.com/depeches/depeche351820908.html')
+      end
+      it "extracts embedded redirects from MySpace" do
+        u = c('http://www.myspace.com/Modules/PostTo/Pages/?u=http%3A%2F%2Fghanaian-chronicle.com%2Fnews%2Fother-news%2Fcanadian-high-commissioner-urges-media%2F&t=Canadian%20High%20Commissioner%20urges%20media')
+        expect(u).to eq('http://ghanaian-chronicle.com/news/other-news/canadian-high-commissioner-urges-media')
+      end
     end
   end
   context "clean" do
     def c(uri)
       PostRank::URI.clean(uri)
     end
-    it "should unescape, c18n and normalize" do
-      c('http://igvita.com/?id=1').should == 'http://igvita.com/?id=1'
-      c('igvita.com/?id=1').should == 'http://igvita.com/?id=1'
+    it "unescapes, canonicalizes and normalizes" do
+      expect(c('http://igvita.com/?id=1')).to eq('http://igvita.com/?id=1')
+      expect(c('igvita.com/?id=1')).to eq('http://igvita.com/?id=1')
+      expect(c('http://igvita.com/?id= 1')).to eq('http://igvita.com/?id=%201')
+      expect(c('http://igvita.com/?id=+1')).to eq('http://igvita.com/?id=%201')
+      expect(c('http://igvita.com/?id%3D%201')).to eq('http://igvita.com/?id=%201')
+      expect(c('igvita.com/a/..?id=1&utm_source=a&awesm=b#c')).to eq('http://igvita.com/?id=1')
-      c('http://igvita.com/?id= 1').should == 'http://igvita.com/?id=%201'
-      c('http://igvita.com/?id=+1').should == 'http://igvita.com/?id=%201'
-      c('http://igvita.com/?id%3D%201').should == 'http://igvita.com/?id=%201'
+      expect(c('igvita.com?id=<>')).to eq('http://igvita.com/?id=%3C%3E')
+      expect(c('igvita.com?id="')).to eq('http://igvita.com/?id=%22')
-      c('igvita.com/a/..?id=1&utm_source=a&awesm=b#c').should == 'http://igvita.com/?id=1'
+      expect(c('test.tumblr.com/post/23223/text-stub')).to eq('http://test.tumblr.com/post/23223')
+    end
-      c('igvita.com?id=<>').should == 'http://igvita.com/?id=%3C%3E'
-      c('igvita.com?id="').should == 'http://igvita.com/?id=%22'
+    it "cleans host specific parameters" do
+      YAML.load_file('spec/c14n_hosts.yml').each do |orig, clean|
+        expect(c(orig)).to eq(clean)
+      end
     end
-    it "should clean host specific parameters" do
-      YAML.load_file('spec/c18n_hosts.yml').each do |orig, clean|
-        c(orig).should == clean
+    context "reserved characters" do
+      it "preserves encoded question marks" do
+        expect(c('http://en.wikipedia.org/wiki/Whose_Line_Is_It_Anyway%3F_%28U.S._TV_series%29')).
+          to eq('http://en.wikipedia.org/wiki/Whose_Line_Is_It_Anyway%3F_(U.S._TV_series)')
+      end
+      it "preserves encoded ampersands" do
+        expect(c('http://example.com/?foo=BAR%26BAZ')).
+          to eq('http://example.com/?foo=BAR%26BAZ')
+      end
+      it "preserves consecutive reserved characters" do
+        expect(c('http://example.com/so-quizical%3F%3F%3F?foo=bar')).
+          to eq('http://example.com/so-quizical%3F%3F%3F?foo=bar')
       end
     end
+  end
+  context "hash" do
+    def h(uri, opts = {})
+      PostRank::URI.hash(uri, opts)
+    end
+    it "computes the MD5 hash without cleaning the URI" do
+      hash = '55fae8910d312b7878a3201ed653b881'
+      expect(h('http://everburning.com/feed/post/1')).to eq(hash)
+      expect(h('everburning.com/feed/post/1')).not_to eq(hash)
+    end
+    it "normalizes the URI if requested and compute MD5 hash" do
+      hash = '55fae8910d312b7878a3201ed653b881'
+      expect(h('http://EverBurning.Com/feed/post/1', :clean => true)).to eq(hash)
+      expect(h('Everburning.com/feed/post/1', :clean => true)).to eq(hash)
+      expect(h('everburning.com/feed/post/1', :clean => true)).to eq(hash)
+      expect(h('everburning.com/feed/post/1/', :clean => true)).to eq(hash)
+    end
   end
   context "extract" do
@@ -150,37 +242,150 @@ describe PostRank::URI do
     end
     context "TLDs" do
-      it "should not pick up bad grammar as a domain name and think it has a link" do
-        e("yah.lets").should be_empty
+      it "does not pick up bad grammar as a domain name and think it has a link" do
+        expect(e("yah.lets")).to be_empty
       end
-      it "should not pickup bad TLDS" do
-        e('stuff.zz a.b.c d.zq').should be_empty
+      it "does not pickup bad TLDS" do
+        expect(e('stuff.zz a.b.c d.zq')).to be_empty
       end
     end
-    it "should handle a URL that comes after text without a space" do
-      e("text:http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
-      e("text;http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
-      e("text.http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
-      e("text-http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
+    it "extracts twitter links with hashbangs" do
+      expect(e('test http://twitter.com/#!/igrigorik')).to include('http://twitter.com/igrigorik')
     end
-    it "should not pick up anything on or after the first . in the path of a URL with a shortener domain" do
-      e("http://bit.ly/9cJ2mz......if ur pickin up anythign here, u FAIL.").should == ["http://bit.ly/9cJ2mz"]
+    it "extracts mobile twitter links with hashbangs" do
+      expect(e('test http://mobile.twitter.com/#!/_mm6')).to include('http://mobile.twitter.com/_mm6')
     end
-    it "should pickup urls without protocol" do
+    it "handles a URL that comes after text without a space" do
+      expect(e("text:http://spn.tw/tfnLT")).to include("http://spn.tw/tfnLT")
+      expect(e("text;http://spn.tw/tfnLT")).to include("http://spn.tw/tfnLT")
+      expect(e("text.http://spn.tw/tfnLT")).to include("http://spn.tw/tfnLT")
+      expect(e("text-http://spn.tw/tfnLT")).to include("http://spn.tw/tfnLT")
+    end
+    it "does not pick up anything on or after the first . in the path of a URL with a shortener domain" do
+      expect(e("http://bit.ly/9cJ2mz......if ur pickin up anythign here, u FAIL.")).to eq(["http://bit.ly/9cJ2mz"])
+    end
+    it "picks up urls without protocol" do
       u = e('abc.com abc.co')
-      u.should include('http://abc.com/')
-      u.should include('http://abc.co/')
+      expect(u).to include('http://abc.com/')
+      expect(u).to include('http://abc.co/')
+    end
+    it "picks up urls inside tags" do
+      u = e("<a href='http://bit.ly/3fds3'>abc.com</a>")
+      expect(u).to include('http://abc.com/')
     end
     context "multibyte characters" do
-      it "should stop extracting URLs at the full-width CJK space character" do
-        e("http://www.youtube.com/watch?v=w_j4Lda25jA　　とんかつ定食").should == ["http://www.youtube.com/watch?v=w_j4Lda25jA"]
+      it "stops extracting URLs at the full-width CJK space character" do
+        expect(e("http://www.youtube.com/watch?v=w_j4Lda25jA　　とんかつ定食")).to eq(["http://www.youtube.com/watch?v=w_j4Lda25jA"])
+      end
+    end
+  end
+  context "href extract" do
+    it "extracts links from html text" do
+      g,b = PostRank::URI.extract_href("<a href='google.com'>link to google</a> with text <a href='b.com'>stuff</a>")
+      expect(g.first).to eq('http://google.com/')
+      expect(b.first).to eq('http://b.com/')
+      expect(g.last).to eq('link to google')
+      expect(b.last).to eq('stuff')
+    end
+    it "handles empty hrefs" do
+      expect do
+        l = PostRank::URI.extract_href("<a>link to google</a> with text <a href=''>stuff</a>")
+        expect(l).to be_empty
+      end.not_to raise_error
+    end
+    context "relative paths" do
+      it "rejects relative paths" do
+        l = PostRank::URI.extract_href("<a href='/stuff'>link to stuff</a>")
+        expect(l).to be_empty
+      end
+      it "resolves relative paths if host is provided" do
+        i = PostRank::URI.extract_href("<a href='/stuff'>link to stuff</a>", "igvita.com").first
+        expect(i.first).to eq('http://igvita.com/stuff')
+        expect(i.last).to eq('link to stuff')
+      end
+    end
+    context "domain extraction" do
+      url_list = {
+        "http://alex.pages.example.com" => "example.com",
+        "alex.pages.example.com" => "example.com",
+        "http://example.com/2011/04/01/blah" => "example.com",
+        "http://example.com" => "example.com",
+        "example.com" => "example.com",
+        "ExampLe.com" => "example.com",
+        "ExampLe.com:3000" => "example.com",
+        "http://alex.pages.example.COM" => "example.com",
+        "http://www.example.ag.it/2011/04/01/blah" => "example.ag.it",
+        "ftp://www.example.com/2011/04/01/blah" => 'example.com',
+        "http://com" => nil,
+        "http://alex.pages.examplecom" => nil,
+        "example" => nil,
+        "http://127.0.0.1" => nil,
+        "localhost" => nil,
+        "hello-there.com/you" => "hello-there.com"
+      }
+      url_list.each_pair do |url, expected_result|
+        it "extracts #{expected_result.inspect} from #{url}" do
+          u = PostRank::URI.clean(url, :raw => true)
+          expect(u.domain).to eq(expected_result)
+        end
       end
     end
   end
-end
+  context "parse" do
+    it 'does not fail on large host-part look-alikes' do
+      expect(PostRank::URI.parse('a'*64+'.ca').host).to eq(nil)
+    end
+    it 'does not pancake javascript scheme URIs' do
+      expect(PostRank::URI.parse('javascript:void(0);').scheme).to eq('javascript')
+    end
+    it 'does not pancake mailto scheme URIs' do
+      expect(PostRank::URI.parse('mailto:void(0);').scheme).to eq('mailto')
+    end
+    it 'does not pancake xmpp scheme URIs' do
+      expect(PostRank::URI.parse('xmpp:void(0);').scheme).to eq('xmpp')
+    end
+  end
+  context 'valid?' do
+    it 'marks incomplete URI string as invalid' do
+      expect(PostRank::URI.valid?('/path/page.html')).to be false
+    end
+    it 'marks www.test.c as invalid' do
+      expect(PostRank::URI.valid?('http://www.test.c')).to be false
+    end
+    it 'marks www.test.com as valid' do
+      expect(PostRank::URI.valid?('http://www.test.com')).to be true
+    end
+    it 'marks Unicode domain as valid (NOTE: works only with a scheme)' do
+      expect(PostRank::URI.valid?('http://президент.рф')).to be true
+    end
+    it 'marks punycode domain domain as valid' do
+      expect(PostRank::URI.valid?('xn--d1abbgf6aiiy.xn--p1ai')).to be true
+    end
+  end
+end

metadata CHANGED Viewed

@@ -1,114 +1,158 @@
---- !ruby/object:Gem::Specification
+--- !ruby/object:Gem::Specification
 name: postrank-uri
-version: !ruby/object:Gem::Version
-  prerelease: false
-  segments:
-  - 1
-  - 0
-  - 0
-  version: 1.0.0
+version: !ruby/object:Gem::Version
+  version: 1.0.24
 platform: ruby
-authors:
+authors:
 - Ilya Grigorik
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-01-20 00:00:00 -05:00
-default_executable:
-dependencies:
-- !ruby/object:Gem::Dependency
+date: 2019-04-03 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
   name: addressable
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 2.4.0
+  type: :runtime
   prerelease: false
-  requirement: &id001 !ruby/object:Gem::Requirement
-    none: false
-    requirements:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 2.4.0
+- !ruby/object:Gem::Dependency
+  name: public_suffix
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
     - - ">="
-      - !ruby/object:Gem::Version
-        segments:
-        - 0
-        version: "0"
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: '2.1'
   type: :runtime
-  version_requirements: *id001
-- !ruby/object:Gem::Dependency
-  name: domainatrix
   prerelease: false
-  requirement: &id002 !ruby/object:Gem::Requirement
-    none: false
-    requirements:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
     - - ">="
-      - !ruby/object:Gem::Version
-        segments:
-        - 0
-        version: "0"
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: '2.1'
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.8.0
   type: :runtime
-  version_requirements: *id002
-- !ruby/object:Gem::Dependency
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.8.0
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
   name: rspec
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
   prerelease: false
-  requirement: &id003 !ruby/object:Gem::Requirement
-    none: false
-    requirements:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: appraisal
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
     - - ">="
-      - !ruby/object:Gem::Version
-        segments:
-        - 0
-        version: "0"
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: '3.0'
   type: :development
-  version_requirements: *id003
-description: URI normalization, c18n, escaping, and extraction
-email:
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: '3.0'
+description: URI normalization, c14n, escaping, and extraction
+email:
 - ilya@igvita.com
 executables: []
 extensions: []
 extra_rdoc_files: []
-files:
+files:
+- ".gitignore"
+- ".rspec"
+- ".travis.yml"
+- Appraisals
 - Gemfile
+- LICENSE
 - README.md
 - Rakefile
 - lib/postrank-uri.rb
-- lib/postrank-uri/c18n.yml
+- lib/postrank-uri/c14n.yml
 - lib/postrank-uri/version.rb
 - postrank-uri.gemspec
-- spec/c18n_hosts.yml
+- spec/c14n_hosts.yml
 - spec/helper.rb
 - spec/postrank-uri_spec.rb
-has_rdoc: true
-homepage: http://rubygems.org/gems/postrank-uri
-licenses: []
+homepage: http://github.com/postrank-labs/postrank-uri
+licenses:
+- MIT
+metadata: {}
 post_install_message:
 rdoc_options: []
-require_paths:
+require_paths:
 - lib
-required_ruby_version: !ruby/object:Gem::Requirement
-  none: false
-  requirements:
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
   - - ">="
-    - !ruby/object:Gem::Version
-      segments:
-      - 0
-      version: "0"
-required_rubygems_version: !ruby/object:Gem::Requirement
-  none: false
-  requirements:
+    - !ruby/object:Gem::Version
+      version: 2.3.0
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
   - - ">="
-    - !ruby/object:Gem::Version
-      segments:
-      - 0
-      version: "0"
+    - !ruby/object:Gem::Version
+      version: '0'
 requirements: []
 rubyforge_project: postrank-uri
-rubygems_version: 1.3.7
+rubygems_version: 2.6.11
 signing_key:
-specification_version: 3
-summary: URI normalization, c18n, escaping, and extraction
-test_files:
-- spec/c18n_hosts.yml
+specification_version: 4
+summary: URI normalization, c14n, escaping, and extraction
+test_files:
+- spec/c14n_hosts.yml
 - spec/helper.rb
 - spec/postrank-uri_spec.rb