shadowbq-domainatrix 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +4 -0
 - data/README.textile +88 -0
 - data/lib/domainatrix/domain_parser.rb +153 -0
 - data/lib/domainatrix/url.rb +51 -0
 - data/lib/domainatrix.rb +48 -0
 - data/lib/effective_tld_names.dat +6868 -0
 - data/spec/domainatrix/domain_parser_spec.rb +157 -0
 - data/spec/domainatrix/url_spec.rb +64 -0
 - data/spec/domainatrix_spec.rb +106 -0
 - data/spec/spec.opts +3 -0
 - data/spec/spec_helper.rb +10 -0
 - metadata +95 -0
 
| 
         @@ -0,0 +1,157 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding : utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
            require File.dirname(__FILE__) + '/../spec_helper'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            describe "domain parser" do
         
     | 
| 
      
 5 
     | 
    
         
            +
              before(:all) do
         
     | 
| 
      
 6 
     | 
    
         
            +
                @domain_parser = Domainatrix::DomainParser.new("#{File.dirname(__FILE__)}/../../lib/effective_tld_names.dat")
         
     | 
| 
      
 7 
     | 
    
         
            +
              end
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              describe "reading the dat file" do
         
     | 
| 
      
 10 
     | 
    
         
            +
                it "creates a tree of the domain names" do
         
     | 
| 
      
 11 
     | 
    
         
            +
                  @domain_parser.public_suffixes.should be_a Hash
         
     | 
| 
      
 12 
     | 
    
         
            +
                end
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                it "creates the first level of the tree" do
         
     | 
| 
      
 15 
     | 
    
         
            +
                  @domain_parser.public_suffixes.should have_key("com")
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                it "creates the first level of the tree even when the first doesn't appear on a line by itself" do
         
     | 
| 
      
 19 
     | 
    
         
            +
                  @domain_parser.public_suffixes.should have_key("uk")
         
     | 
| 
      
 20 
     | 
    
         
            +
                end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
                it "creates lower levels of the tree" do
         
     | 
| 
      
 23 
     | 
    
         
            +
                  @domain_parser.public_suffixes["jp"].should have_key("ac")
         
     | 
| 
      
 24 
     | 
    
         
            +
                  @domain_parser.public_suffixes["jp"]["kawasaki"].should have_key("*")
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
              end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
              describe "parsing" do
         
     | 
| 
      
 29 
     | 
    
         
            +
                it "returns a hash of parts" do
         
     | 
| 
      
 30 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.net").should be_a Hash
         
     | 
| 
      
 31 
     | 
    
         
            +
                end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                it "includes the original url" do
         
     | 
| 
      
 34 
     | 
    
         
            +
                  @domain_parser.parse("http://www.pauldix.net")[:url].should == "http://www.pauldix.net/"
         
     | 
| 
      
 35 
     | 
    
         
            +
                end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                it "includes the scheme" do
         
     | 
| 
      
 38 
     | 
    
         
            +
                  @domain_parser.parse("http://www.pauldix.net")[:scheme].should == "http"
         
     | 
| 
      
 39 
     | 
    
         
            +
                end
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
                it "includes the full host" do
         
     | 
| 
      
 42 
     | 
    
         
            +
                  @domain_parser.parse("http://www.pauldix.net")[:host].should == "www.pauldix.net"
         
     | 
| 
      
 43 
     | 
    
         
            +
                end
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
                it "parses out the path" do
         
     | 
| 
      
 46 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.net/foo.html?asdf=foo#bar")[:path].should == "/foo.html?asdf=foo#bar"
         
     | 
| 
      
 47 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.net/foo.html?asdf=foo")[:path].should == "/foo.html?asdf=foo"
         
     | 
| 
      
 48 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.net?asdf=foo")[:path].should == "?asdf=foo"
         
     | 
| 
      
 49 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.net")[:path].should == ""
         
     | 
| 
      
 50 
     | 
    
         
            +
                end
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                it "parses the tld" do
         
     | 
| 
      
 53 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.net")[:public_suffix].should == "net"
         
     | 
| 
      
 54 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.co.uk")[:public_suffix].should == "co.uk"
         
     | 
| 
      
 55 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.com.kg")[:public_suffix].should == "com.kg"
         
     | 
| 
      
 56 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.com.kawasaki.jp")[:public_suffix].should == "com.kawasaki.jp"
         
     | 
| 
      
 57 
     | 
    
         
            +
                end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
                it "should have the domain" do
         
     | 
| 
      
 60 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.net")[:domain].should == "pauldix"
         
     | 
| 
      
 61 
     | 
    
         
            +
                  @domain_parser.parse("http://foo.pauldix.net")[:domain].should == "pauldix"
         
     | 
| 
      
 62 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.co.uk")[:domain].should == "pauldix"
         
     | 
| 
      
 63 
     | 
    
         
            +
                  @domain_parser.parse("http://foo.pauldix.co.uk")[:domain].should == "pauldix"
         
     | 
| 
      
 64 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.com.kg")[:domain].should == "pauldix"
         
     | 
| 
      
 65 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.com.kawasaki.jp")[:domain].should == "pauldix"
         
     | 
| 
      
 66 
     | 
    
         
            +
                end
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
                it "should have subdomains" do
         
     | 
| 
      
 69 
     | 
    
         
            +
                  @domain_parser.parse("http://foo.pauldix.net")[:subdomain].should == "foo"
         
     | 
| 
      
 70 
     | 
    
         
            +
                  @domain_parser.parse("http://bar.foo.pauldix.co.uk")[:subdomain].should == "bar.foo"
         
     | 
| 
      
 71 
     | 
    
         
            +
                end
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
                it "parses a link to localhost" do
         
     | 
| 
      
 74 
     | 
    
         
            +
                  parsed = @domain_parser.parse("http://localhost")
         
     | 
| 
      
 75 
     | 
    
         
            +
                  parsed[:host].should == "localhost"
         
     | 
| 
      
 76 
     | 
    
         
            +
                  parsed[:url].should == "http://localhost/"
         
     | 
| 
      
 77 
     | 
    
         
            +
                  parsed[:domain].should == "localhost"
         
     | 
| 
      
 78 
     | 
    
         
            +
                  parsed[:public_suffix].should == ""
         
     | 
| 
      
 79 
     | 
    
         
            +
                end
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
                it "should accept wildcards" do
         
     | 
| 
      
 82 
     | 
    
         
            +
                  @domain_parser.parse("http://*.pauldix.net")[:subdomain].should == "*"
         
     | 
| 
      
 83 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.*")[:public_suffix].should == "*"
         
     | 
| 
      
 84 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.net/*")[:path].should == "/*"
         
     | 
| 
      
 85 
     | 
    
         
            +
             
     | 
| 
      
 86 
     | 
    
         
            +
                  combined = @domain_parser.parse("http://*.pauldix.*/*")
         
     | 
| 
      
 87 
     | 
    
         
            +
                  combined[:subdomain].should == "*"
         
     | 
| 
      
 88 
     | 
    
         
            +
                  combined[:domain].should == "pauldix"
         
     | 
| 
      
 89 
     | 
    
         
            +
                  combined[:public_suffix].should == "*"
         
     | 
| 
      
 90 
     | 
    
         
            +
                  combined[:path].should == "/*"
         
     | 
| 
      
 91 
     | 
    
         
            +
                end
         
     | 
| 
      
 92 
     | 
    
         
            +
             
     | 
| 
      
 93 
     | 
    
         
            +
                it "should parse a URL if it has a wildcard exception" do
         
     | 
| 
      
 94 
     | 
    
         
            +
                  @domain_parser.parse("http://metro.tokyo.jp")[:domain].should == "metro"
         
     | 
| 
      
 95 
     | 
    
         
            +
                end
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
                it "should throw an exception if the tld is not valid" do
         
     | 
| 
      
 98 
     | 
    
         
            +
                  lambda { @domain_parser.parse("http://pauldix.nett") }.should raise_error(Domainatrix::ParseError)
         
     | 
| 
      
 99 
     | 
    
         
            +
                end
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
      
 101 
     | 
    
         
            +
                it "should throw an exception if the domain doesn't contain a valid host" do
         
     | 
| 
      
 102 
     | 
    
         
            +
                  lambda { @domain_parser.parse("http://co.jp") }.should raise_error(Domainatrix::ParseError)
         
     | 
| 
      
 103 
     | 
    
         
            +
                end
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
                it "should throw an exception if the domain contains an invalid character" do
         
     | 
| 
      
 106 
     | 
    
         
            +
                  lambda { @domain_parser.parse("http://pauldix,net") }.should raise_error(Domainatrix::ParseError)
         
     | 
| 
      
 107 
     | 
    
         
            +
                end
         
     | 
| 
      
 108 
     | 
    
         
            +
                
         
     | 
| 
      
 109 
     | 
    
         
            +
                it "should thrown an exception if the url is malformed" do
         
     | 
| 
      
 110 
     | 
    
         
            +
                  lambda { @domain_parser.parse("http:/") }.should raise_error(Domainatrix::ParseError)
         
     | 
| 
      
 111 
     | 
    
         
            +
                end
         
     | 
| 
      
 112 
     | 
    
         
            +
             
     | 
| 
      
 113 
     | 
    
         
            +
                it "parses an ip address" do
         
     | 
| 
      
 114 
     | 
    
         
            +
                  @domain_parser.parse("http://123.123.123.123/foo/bar")[:domain].should == "123.123.123.123"
         
     | 
| 
      
 115 
     | 
    
         
            +
                  @domain_parser.parse("http://123.123.123.123/foo/bar")[:path].should == "/foo/bar"
         
     | 
| 
      
 116 
     | 
    
         
            +
                  @domain_parser.parse("http://123.123.123.123/foo/bar")[:ip_address].should == true
         
     | 
| 
      
 117 
     | 
    
         
            +
                end
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
                it "parses a host with numeric domain" do
         
     | 
| 
      
 120 
     | 
    
         
            +
                  @domain_parser.parse("http://123.123.123.co.uk/foo/bar")[:subdomain].should == "123.123"
         
     | 
| 
      
 121 
     | 
    
         
            +
                  @domain_parser.parse("http://123.123.123.co.uk/foo/bar")[:domain].should == "123"
         
     | 
| 
      
 122 
     | 
    
         
            +
                  @domain_parser.parse("http://123.123.123.co.uk/foo/bar")[:public_suffix].should == "co.uk"
         
     | 
| 
      
 123 
     | 
    
         
            +
                  @domain_parser.parse("http://123.123.123.co.uk/foo/bar")[:ip_address].should == false
         
     | 
| 
      
 124 
     | 
    
         
            +
                end
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
                it "should not parse an invalid ip address" do
         
     | 
| 
      
 127 
     | 
    
         
            +
                  lambda { @domain_parser.parse("http://12345") }.should raise_error(Domainatrix::ParseError)
         
     | 
| 
      
 128 
     | 
    
         
            +
                end
         
     | 
| 
      
 129 
     | 
    
         
            +
                
         
     | 
| 
      
 130 
     | 
    
         
            +
                it "defaults to http if no scheme is applied" do
         
     | 
| 
      
 131 
     | 
    
         
            +
                  @domain_parser.parse("www.pauldix.net")[:host].should == "www.pauldix.net"
         
     | 
| 
      
 132 
     | 
    
         
            +
                  @domain_parser.parse("www.pauldix.net")[:scheme].should == "http"
         
     | 
| 
      
 133 
     | 
    
         
            +
                end
         
     | 
| 
      
 134 
     | 
    
         
            +
             
     | 
| 
      
 135 
     | 
    
         
            +
              end
         
     | 
| 
      
 136 
     | 
    
         
            +
              
         
     | 
| 
      
 137 
     | 
    
         
            +
              describe "handling utf-8" do
         
     | 
| 
      
 138 
     | 
    
         
            +
                
         
     | 
| 
      
 139 
     | 
    
         
            +
                it "handles public suffixes with utf-8" do
         
     | 
| 
      
 140 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.السعوديه")[:public_suffix].should == "السعوديه"
         
     | 
| 
      
 141 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.臺灣")[:public_suffix].should == "臺灣"
         
     | 
| 
      
 142 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.السعوديه")[:domain].should == "pauldix"
         
     | 
| 
      
 143 
     | 
    
         
            +
                  @domain_parser.parse("http://pauldix.臺灣")[:domain].should == "pauldix"
         
     | 
| 
      
 144 
     | 
    
         
            +
                end
         
     | 
| 
      
 145 
     | 
    
         
            +
                
         
     | 
| 
      
 146 
     | 
    
         
            +
                it "handles unicode urls as puny code" do
         
     | 
| 
      
 147 
     | 
    
         
            +
                   input = "http://✪df.ws/fil"
         
     | 
| 
      
 148 
     | 
    
         
            +
                   parsed = @domain_parser.parse(input)
         
     | 
| 
      
 149 
     | 
    
         
            +
                   parsed[:url].should == "http://xn--df-oiy.ws/fil"
         
     | 
| 
      
 150 
     | 
    
         
            +
                   parsed[:host].should == "✪df.ws"
         
     | 
| 
      
 151 
     | 
    
         
            +
                   parsed[:path].should == "/fil"
         
     | 
| 
      
 152 
     | 
    
         
            +
                   parsed[:public_suffix].should == "ws"
         
     | 
| 
      
 153 
     | 
    
         
            +
                end
         
     | 
| 
      
 154 
     | 
    
         
            +
                
         
     | 
| 
      
 155 
     | 
    
         
            +
              end
         
     | 
| 
      
 156 
     | 
    
         
            +
              
         
     | 
| 
      
 157 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,64 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require File.dirname(__FILE__) + '/../spec_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            describe "url" do
         
     | 
| 
      
 4 
     | 
    
         
            +
              it "has the original url" do
         
     | 
| 
      
 5 
     | 
    
         
            +
                Domainatrix::Url.new(:url => "http://pauldix.net").url.should == "http://pauldix.net"
         
     | 
| 
      
 6 
     | 
    
         
            +
              end
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
              it "has the public_suffix" do
         
     | 
| 
      
 9 
     | 
    
         
            +
                Domainatrix::Url.new(:public_suffix => "net").public_suffix.should == "net"
         
     | 
| 
      
 10 
     | 
    
         
            +
              end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
              it "has the domain" do
         
     | 
| 
      
 13 
     | 
    
         
            +
                Domainatrix::Url.new(:domain => "pauldix").domain.should == "pauldix"
         
     | 
| 
      
 14 
     | 
    
         
            +
              end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              it "has the subdomain" do
         
     | 
| 
      
 17 
     | 
    
         
            +
                Domainatrix::Url.new(:subdomain => "foo").subdomain.should == "foo"
         
     | 
| 
      
 18 
     | 
    
         
            +
              end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
              it "has the path" do
         
     | 
| 
      
 21 
     | 
    
         
            +
                Domainatrix::Url.new(:path => "/asdf.html").path.should == "/asdf.html"
         
     | 
| 
      
 22 
     | 
    
         
            +
              end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
              it "reports if it is an ip address" do
         
     | 
| 
      
 25 
     | 
    
         
            +
                Domainatrix::Url.new(:ip_address => true).ip_address.should == true
         
     | 
| 
      
 26 
     | 
    
         
            +
              end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
              it "canonicalizes the url" do
         
     | 
| 
      
 29 
     | 
    
         
            +
                Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix"
         
     | 
| 
      
 30 
     | 
    
         
            +
                Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.foo"
         
     | 
| 
      
 31 
     | 
    
         
            +
                Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.bar.foo"
         
     | 
| 
      
 32 
     | 
    
         
            +
                Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
         
     | 
| 
      
 33 
     | 
    
         
            +
                Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.foo"
         
     | 
| 
      
 34 
     | 
    
         
            +
                Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.bar.foo"
         
     | 
| 
      
 35 
     | 
    
         
            +
                Domainatrix::Url.new(:subdomain => "", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
         
     | 
| 
      
 36 
     | 
    
         
            +
              end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
              it "canonicalizes the url with the path" do
         
     | 
| 
      
 39 
     | 
    
         
            +
                Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net", :path => "/hello").canonical.should == "net.pauldix.foo/hello"
         
     | 
| 
      
 40 
     | 
    
         
            +
              end
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
              it "canonicalizes the url without the path" do
         
     | 
| 
      
 43 
     | 
    
         
            +
                Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical(:include_path => false).should == "net.pauldix.foo"
         
     | 
| 
      
 44 
     | 
    
         
            +
              end
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
              it "combines the domain with the public_suffix" do
         
     | 
| 
      
 47 
     | 
    
         
            +
                Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").domain_with_public_suffix.should == "pauldix.net"
         
     | 
| 
      
 48 
     | 
    
         
            +
                Domainatrix::Url.new(:domain => "foo", :public_suffix => "co.uk" ).domain_with_public_suffix.should == "foo.co.uk"
         
     | 
| 
      
 49 
     | 
    
         
            +
                Domainatrix::Url.new(:subdomain => "baz", :domain => "bar", :public_suffix => "com").domain_with_public_suffix.should == "bar.com"
         
     | 
| 
      
 50 
     | 
    
         
            +
              end
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
              it "combines the domain with the public_suffix as an alias" do
         
     | 
| 
      
 53 
     | 
    
         
            +
                Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").domain_with_tld.should == "pauldix.net"
         
     | 
| 
      
 54 
     | 
    
         
            +
                Domainatrix::Url.new(:domain => "foo", :public_suffix => "co.uk" ).domain_with_tld.should == "foo.co.uk"
         
     | 
| 
      
 55 
     | 
    
         
            +
                Domainatrix::Url.new(:subdomain => "baz", :domain => "bar", :public_suffix => "com").domain_with_tld.should == "bar.com"
         
     | 
| 
      
 56 
     | 
    
         
            +
              end
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
              it "converts the url to a string" do
         
     | 
| 
      
 59 
     | 
    
         
            +
                Domainatrix::Url.new(:scheme => "http", :subdomain => "www", :domain => "pauldix", :public_suffix => "net", :path => "/some/path").to_s.should == "http://www.pauldix.net/some/path"
         
     | 
| 
      
 60 
     | 
    
         
            +
                Domainatrix::Url.new(:subdomain => "www", :domain => "pauldix", :public_suffix => "net", :path => "/some/path").to_s.should == "www.pauldix.net/some/path"
         
     | 
| 
      
 61 
     | 
    
         
            +
                Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "co.uk").to_s.should == "pauldix.co.uk"
         
     | 
| 
      
 62 
     | 
    
         
            +
              end
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,106 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require File.dirname(__FILE__) + '/spec_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            describe Domainatrix do
         
     | 
| 
      
 4 
     | 
    
         
            +
              describe ".parse" do
         
     | 
| 
      
 5 
     | 
    
         
            +
                it "should convert a string into a url object" do
         
     | 
| 
      
 6 
     | 
    
         
            +
                  Domainatrix.parse("http://pauldix.net").should be_a Domainatrix::Url
         
     | 
| 
      
 7 
     | 
    
         
            +
                end
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                it "should canonicalize" do
         
     | 
| 
      
 10 
     | 
    
         
            +
                  Domainatrix.parse("http://pauldix.net").canonical.should == "net.pauldix"
         
     | 
| 
      
 11 
     | 
    
         
            +
                  Domainatrix.parse("http://pauldix.net/foo.html").canonical.should == "net.pauldix/foo.html"
         
     | 
| 
      
 12 
     | 
    
         
            +
                  Domainatrix.parse("http://pauldix.net/foo.html?asdf=bar").canonical.should == "net.pauldix/foo.html?asdf=bar"
         
     | 
| 
      
 13 
     | 
    
         
            +
                  Domainatrix.parse("http://foo.pauldix.net").canonical.should == "net.pauldix.foo"
         
     | 
| 
      
 14 
     | 
    
         
            +
                  Domainatrix.parse("http://foo.bar.pauldix.net").canonical.should == "net.pauldix.bar.foo"
         
     | 
| 
      
 15 
     | 
    
         
            +
                  Domainatrix.parse("http://pauldix.co.uk").canonical.should == "uk.co.pauldix"
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
              describe ".scan" do
         
     | 
| 
      
 20 
     | 
    
         
            +
                it "parses the url found in a string" do
         
     | 
| 
      
 21 
     | 
    
         
            +
                  input = "HAHA. This is why Conan should stay: http://losangeles.craigslist.org/sfv/clt/1551463643.html"
         
     | 
| 
      
 22 
     | 
    
         
            +
                  url = Domainatrix.scan(input).first
         
     | 
| 
      
 23 
     | 
    
         
            +
                  url.canonical.should == "org.craigslist.losangeles/sfv/clt/1551463643.html"
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
                it "handles shouting" do
         
     | 
| 
      
 27 
     | 
    
         
            +
                  input = "TONIGHT!!  @chelseavperetti @toddglass @dougbenson @realjeffreyross ME and Tig Notaro   http://WWW.OPCCEVENTS.ORG/"
         
     | 
| 
      
 28 
     | 
    
         
            +
                  url = Domainatrix.scan(input).first
         
     | 
| 
      
 29 
     | 
    
         
            +
                  url.should_not be_nil
         
     | 
| 
      
 30 
     | 
    
         
            +
                  url.url.should == "http://www.opccevents.org/"
         
     | 
| 
      
 31 
     | 
    
         
            +
                end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                it "finds multiple urls in a string" do
         
     | 
| 
      
 35 
     | 
    
         
            +
                  input = <<-TEXT
         
     | 
| 
      
 36 
     | 
    
         
            +
                  http://google.com
         
     | 
| 
      
 37 
     | 
    
         
            +
                  and then http://yahoo.com
         
     | 
| 
      
 38 
     | 
    
         
            +
                  TEXT
         
     | 
| 
      
 39 
     | 
    
         
            +
                  google, yahoo = Domainatrix.scan(input)
         
     | 
| 
      
 40 
     | 
    
         
            +
                  google.domain.should == "google"
         
     | 
| 
      
 41 
     | 
    
         
            +
                  yahoo.domain.should == "yahoo"
         
     | 
| 
      
 42 
     | 
    
         
            +
                end
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                it "returns a map of results when given a block" do
         
     | 
| 
      
 45 
     | 
    
         
            +
                  input = "http://a.com https://b.com"
         
     | 
| 
      
 46 
     | 
    
         
            +
                  domains = Domainatrix.scan(input) do |url|
         
     | 
| 
      
 47 
     | 
    
         
            +
                    url.domain
         
     | 
| 
      
 48 
     | 
    
         
            +
                  end
         
     | 
| 
      
 49 
     | 
    
         
            +
                  domains.should == %w(a b)
         
     | 
| 
      
 50 
     | 
    
         
            +
                end
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                it "returns an empty array when no urls are found" do
         
     | 
| 
      
 53 
     | 
    
         
            +
                  Domainatrix.scan("Nope").should == []
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                it "removes unlikely characters from the end of URLs" do
         
     | 
| 
      
 57 
     | 
    
         
            +
                  input = <<-TEXT
         
     | 
| 
      
 58 
     | 
    
         
            +
                  Check out http://tobtr.com/s/821921.
         
     | 
| 
      
 59 
     | 
    
         
            +
                  Oh, and also (http://www.google.com): Cool stuff!
         
     | 
| 
      
 60 
     | 
    
         
            +
                  http://fora.tv/v/c8637, is almost as good as http://example.com...
         
     | 
| 
      
 61 
     | 
    
         
            +
                  http://foo.com" <http://baz.com>
         
     | 
| 
      
 62 
     | 
    
         
            +
                  TEXT
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
                  urls = Domainatrix.scan(input).map {|u| u.url}
         
     | 
| 
      
 65 
     | 
    
         
            +
                  urls.should == %w(http://tobtr.com/s/821921 http://www.google.com/ http://fora.tv/v/c8637 http://example.com/ http://foo.com/ http://baz.com/)
         
     | 
| 
      
 66 
     | 
    
         
            +
                end
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
              end
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
              context 'localhost with a port' do
         
     | 
| 
      
 71 
     | 
    
         
            +
                subject { Domainatrix.parse('localhost:3000') }
         
     | 
| 
      
 72 
     | 
    
         
            +
                its(:scheme) { should == 'http' }
         
     | 
| 
      
 73 
     | 
    
         
            +
                its(:host) { should == 'localhost' }
         
     | 
| 
      
 74 
     | 
    
         
            +
                its(:url) { should == 'http://localhost:3000/' }
         
     | 
| 
      
 75 
     | 
    
         
            +
                its(:public_suffix) { should == '' }
         
     | 
| 
      
 76 
     | 
    
         
            +
                its(:domain) { should == 'localhost' }
         
     | 
| 
      
 77 
     | 
    
         
            +
                its(:subdomain) { should == '' }
         
     | 
| 
      
 78 
     | 
    
         
            +
                its(:path) { should == '' }
         
     | 
| 
      
 79 
     | 
    
         
            +
                its(:domain_with_tld) { should == 'localhost' }
         
     | 
| 
      
 80 
     | 
    
         
            +
              end
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
              context 'without a scheme' do
         
     | 
| 
      
 83 
     | 
    
         
            +
                subject { Domainatrix.parse('www.pauldix.net') }
         
     | 
| 
      
 84 
     | 
    
         
            +
                its(:scheme) { should == 'http' }
         
     | 
| 
      
 85 
     | 
    
         
            +
                its(:host) { should == 'www.pauldix.net' }
         
     | 
| 
      
 86 
     | 
    
         
            +
                its(:url) { should == 'http://www.pauldix.net/' }
         
     | 
| 
      
 87 
     | 
    
         
            +
                its(:public_suffix) { should == 'net' }
         
     | 
| 
      
 88 
     | 
    
         
            +
                its(:domain) { should == 'pauldix' }
         
     | 
| 
      
 89 
     | 
    
         
            +
                its(:subdomain) { should == 'www' }
         
     | 
| 
      
 90 
     | 
    
         
            +
                its(:path) { should == '' }
         
     | 
| 
      
 91 
     | 
    
         
            +
                its(:domain_with_tld) { should == 'pauldix.net' }
         
     | 
| 
      
 92 
     | 
    
         
            +
              end
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
              context 'with a blank url' do
         
     | 
| 
      
 95 
     | 
    
         
            +
                subject { Domainatrix.parse(nil) }
         
     | 
| 
      
 96 
     | 
    
         
            +
                its(:scheme) { should == '' }
         
     | 
| 
      
 97 
     | 
    
         
            +
                its(:host) { should == '' }
         
     | 
| 
      
 98 
     | 
    
         
            +
                its(:url) { should == '' }
         
     | 
| 
      
 99 
     | 
    
         
            +
                its(:public_suffix) { should == '' }
         
     | 
| 
      
 100 
     | 
    
         
            +
                its(:domain) { should == '' }
         
     | 
| 
      
 101 
     | 
    
         
            +
                its(:subdomain) { should == '' }
         
     | 
| 
      
 102 
     | 
    
         
            +
                its(:path) { should == '' }
         
     | 
| 
      
 103 
     | 
    
         
            +
                its(:domain_with_tld) { should == '' }
         
     | 
| 
      
 104 
     | 
    
         
            +
              end
         
     | 
| 
      
 105 
     | 
    
         
            +
             
     | 
| 
      
 106 
     | 
    
         
            +
            end
         
     | 
    
        data/spec/spec.opts
    ADDED
    
    
    
        data/spec/spec_helper.rb
    ADDED
    
    | 
         @@ -0,0 +1,10 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "rubygems"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require "rspec"
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            # gem install redgreen for colored test output
         
     | 
| 
      
 5 
     | 
    
         
            +
            begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
         
     | 
| 
      
 8 
     | 
    
         
            +
            $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            require "#{File.dirname(__FILE__)}/../lib/domainatrix"
         
     | 
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,95 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: shadowbq-domainatrix
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version
         
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.0.11
         
     | 
| 
      
 5 
     | 
    
         
            +
              prerelease: 
         
     | 
| 
      
 6 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 7 
     | 
    
         
            +
            authors:
         
     | 
| 
      
 8 
     | 
    
         
            +
            - Paul Dix
         
     | 
| 
      
 9 
     | 
    
         
            +
            - Brian John
         
     | 
| 
      
 10 
     | 
    
         
            +
            - Shadowbq
         
     | 
| 
      
 11 
     | 
    
         
            +
            - Menno van der Sman
         
     | 
| 
      
 12 
     | 
    
         
            +
            - Wouter Broekhof
         
     | 
| 
      
 13 
     | 
    
         
            +
            - Wilson
         
     | 
| 
      
 14 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 15 
     | 
    
         
            +
            bindir: bin
         
     | 
| 
      
 16 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 17 
     | 
    
         
            +
            date: 2013-03-21 00:00:00.000000000 Z
         
     | 
| 
      
 18 
     | 
    
         
            +
            dependencies:
         
     | 
| 
      
 19 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 20 
     | 
    
         
            +
              name: addressable
         
     | 
| 
      
 21 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 22 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 23 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 24 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 25 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 26 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 27 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 28 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 29 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 30 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 31 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 32 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 33 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 34 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 35 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 36 
     | 
    
         
            +
              name: rspec
         
     | 
| 
      
 37 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 38 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 39 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 40 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 41 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 42 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 43 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 44 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 45 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 46 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 47 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 48 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 49 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 50 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 51 
     | 
    
         
            +
            description: 
         
     | 
| 
      
 52 
     | 
    
         
            +
            email:
         
     | 
| 
      
 53 
     | 
    
         
            +
            - shadowbq@gmail.com
         
     | 
| 
      
 54 
     | 
    
         
            +
            executables: []
         
     | 
| 
      
 55 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 56 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 57 
     | 
    
         
            +
            files:
         
     | 
| 
      
 58 
     | 
    
         
            +
            - lib/domainatrix.rb
         
     | 
| 
      
 59 
     | 
    
         
            +
            - lib/effective_tld_names.dat
         
     | 
| 
      
 60 
     | 
    
         
            +
            - lib/domainatrix/domain_parser.rb
         
     | 
| 
      
 61 
     | 
    
         
            +
            - lib/domainatrix/url.rb
         
     | 
| 
      
 62 
     | 
    
         
            +
            - CHANGELOG.md
         
     | 
| 
      
 63 
     | 
    
         
            +
            - README.textile
         
     | 
| 
      
 64 
     | 
    
         
            +
            - spec/spec.opts
         
     | 
| 
      
 65 
     | 
    
         
            +
            - spec/spec_helper.rb
         
     | 
| 
      
 66 
     | 
    
         
            +
            - spec/domainatrix_spec.rb
         
     | 
| 
      
 67 
     | 
    
         
            +
            - spec/domainatrix/domain_parser_spec.rb
         
     | 
| 
      
 68 
     | 
    
         
            +
            - spec/domainatrix/url_spec.rb
         
     | 
| 
      
 69 
     | 
    
         
            +
            homepage: http://github.com/shadowbq/domainatrix
         
     | 
| 
      
 70 
     | 
    
         
            +
            licenses: []
         
     | 
| 
      
 71 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 72 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 73 
     | 
    
         
            +
            require_paths:
         
     | 
| 
      
 74 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 75 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 76 
     | 
    
         
            +
              none: false
         
     | 
| 
      
 77 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 78 
     | 
    
         
            +
              - - ! '>='
         
     | 
| 
      
 79 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 80 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 81 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 82 
     | 
    
         
            +
              none: false
         
     | 
| 
      
 83 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 84 
     | 
    
         
            +
              - - ! '>='
         
     | 
| 
      
 85 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 86 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 87 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 88 
     | 
    
         
            +
            rubyforge_project: 
         
     | 
| 
      
 89 
     | 
    
         
            +
            rubygems_version: 1.8.24
         
     | 
| 
      
 90 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 91 
     | 
    
         
            +
            specification_version: 2
         
     | 
| 
      
 92 
     | 
    
         
            +
            summary: A cruel mistress that uses the public suffix domain list to dominate URLs
         
     | 
| 
      
 93 
     | 
    
         
            +
              by canonicalizing, finding the public suffix, and breaking them into their domain
         
     | 
| 
      
 94 
     | 
    
         
            +
              parts.
         
     | 
| 
      
 95 
     | 
    
         
            +
            test_files: []
         
     |