domainatrix 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
data/README.textile CHANGED
@@ -4,11 +4,11 @@ h1. Domainatrix
4
4
 
5
5
  h2. Summary
6
6
 
7
- A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding TLDs, and breaking them into their domain parts.
7
+ A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding public suffixes, and breaking them into their domain parts.
8
8
 
9
9
  h2. Description
10
10
 
11
- This simple library can parse a URL into its canonical form. It uses the list of domains from "http://publicsuffix.org":http://publicsuffix.org to break the domain into its tld, domain, and subdomain.
11
+ This simple library can parse a URL into its canonical form. It uses the list of domains from "http://publicsuffix.org":http://publicsuffix.org to break the domain into its public suffix, domain, and subdomain.
12
12
 
13
13
  h2. Installation
14
14
 
@@ -24,12 +24,12 @@ require 'domainatrix'
24
24
 
25
25
  url = Domainatrix.parse("http://www.pauldix.net")
26
26
  url.url # => "http://www.pauldix.net" (the original url)
27
- url.tld # => "net"
27
+ url.public_suffix # => "net"
28
28
  url.domain # => "pauldix"
29
29
  url.canonical # => "net.pauldix"
30
30
 
31
31
  url = Domainatrix.parse("http://foo.bar.pauldix.co.uk/asdf.html?q=arg")
32
- url.tld # => "co.uk"
32
+ url.public_suffix # => "co.uk"
33
33
  url.domain # => "pauldix"
34
34
  url.subdomain # => "foo.bar"
35
35
  url.path # => "/asdf.html?q=arg"
data/lib/domainatrix.rb CHANGED
@@ -5,7 +5,7 @@ require 'domainatrix/domain_parser.rb'
5
5
  require 'domainatrix/url.rb'
6
6
 
7
7
  module Domainatrix
8
- VERSION = "0.0.5"
8
+ VERSION = "0.0.6"
9
9
 
10
10
  def self.parse(url)
11
11
  @domain_parser ||= DomainParser.new("#{File.dirname(__FILE__)}/effective_tld_names.dat")
@@ -1,9 +1,9 @@
1
1
  module Domainatrix
2
2
  class DomainParser
3
- attr_reader :tlds
3
+ attr_reader :public_suffixes
4
4
 
5
5
  def initialize(file_name)
6
- @tlds = {}
6
+ @public_suffixes = {}
7
7
  read_dat_file(file_name)
8
8
  end
9
9
 
@@ -13,7 +13,7 @@ module Domainatrix
13
13
  unless (line =~ /\/\//) || line.empty?
14
14
  parts = line.split(".").reverse
15
15
 
16
- sub_hash = @tlds
16
+ sub_hash = @public_suffixes
17
17
  parts.each do |part|
18
18
  sub_hash = (sub_hash[part] ||= {})
19
19
  end
@@ -33,31 +33,31 @@ module Domainatrix
33
33
 
34
34
  def parse_domains_from_host(host)
35
35
  parts = host.split(".").reverse
36
- tld = []
36
+ public_suffix = []
37
37
  domain = ""
38
38
  subdomains = []
39
- sub_hash = @tlds
39
+ sub_hash = @public_suffixes
40
40
  parts.each_index do |i|
41
41
  part = parts[i]
42
42
 
43
43
  sub_parts = sub_hash[part]
44
44
  sub_hash = sub_parts
45
45
  if sub_parts.has_key? "*"
46
- tld << part
47
- tld << parts[i+1]
46
+ public_suffix << part
47
+ public_suffix << parts[i+1]
48
48
  domain = parts[i+2]
49
49
  subdomains = parts.slice(i+3, parts.size)
50
50
  break
51
51
  elsif sub_parts.empty? || !sub_parts.has_key?(parts[i+1])
52
- tld << part
52
+ public_suffix << part
53
53
  domain = parts[i+1]
54
54
  subdomains = parts.slice(i+2, parts.size)
55
55
  break
56
56
  else
57
- tld << part
57
+ public_suffix << part
58
58
  end
59
59
  end
60
- {:tld => tld.reverse.join("."), :domain => domain, :subdomain => subdomains.reverse.join(".")}
60
+ {:public_suffix => public_suffix.reverse.join("."), :domain => domain, :subdomain => subdomains.reverse.join(".")}
61
61
  end
62
62
  end
63
63
  end
@@ -1,18 +1,18 @@
1
1
  module Domainatrix
2
2
  class Url
3
- attr_reader :tld, :domain, :subdomain, :path, :url
3
+ attr_reader :public_suffix, :domain, :subdomain, :path, :url
4
4
 
5
5
  def initialize(attrs = {})
6
6
  @url = attrs[:url]
7
- @tld = attrs[:tld]
7
+ @public_suffix = attrs[:public_suffix]
8
8
  @domain = attrs[:domain]
9
9
  @subdomain = attrs[:subdomain]
10
10
  @path = attrs[:path]
11
11
  end
12
12
 
13
13
  def canonical(options = {})
14
- tld_parts = @tld.split(".")
15
- url = "#{tld_parts.reverse.join(".")}.#{@domain}"
14
+ public_suffix_parts = @public_suffix.split(".")
15
+ url = "#{public_suffix_parts.reverse.join(".")}.#{@domain}"
16
16
  if @subdomain && !@subdomain.empty?
17
17
  subdomain_parts = @subdomain.split(".")
18
18
  url << ".#{subdomain_parts.reverse.join(".")}"
@@ -7,20 +7,20 @@ describe "domain parser" do
7
7
 
8
8
  describe "reading the dat file" do
9
9
  it "creates a tree of the domain names" do
10
- @domain_parser.tlds.should be_a Hash
10
+ @domain_parser.public_suffixes.should be_a Hash
11
11
  end
12
12
 
13
13
  it "creates the first level of the tree" do
14
- @domain_parser.tlds.should have_key("com")
14
+ @domain_parser.public_suffixes.should have_key("com")
15
15
  end
16
16
 
17
17
  it "creates the first level of the tree even when the first doesn't appear on a line by itself" do
18
- @domain_parser.tlds.should have_key("uk")
18
+ @domain_parser.public_suffixes.should have_key("uk")
19
19
  end
20
20
 
21
21
  it "creates lower levels of the tree" do
22
- @domain_parser.tlds["jp"].should have_key("ac")
23
- @domain_parser.tlds["jp"]["aichi"].should have_key("*")
22
+ @domain_parser.public_suffixes["jp"].should have_key("ac")
23
+ @domain_parser.public_suffixes["jp"]["aichi"].should have_key("*")
24
24
  end
25
25
  end
26
26
 
@@ -40,10 +40,10 @@ describe "domain parser" do
40
40
  end
41
41
 
42
42
  it "parses the tld" do
43
- @domain_parser.parse("http://pauldix.net")[:tld].should == "net"
44
- @domain_parser.parse("http://pauldix.co.uk")[:tld].should == "co.uk"
45
- @domain_parser.parse("http://pauldix.com.kg")[:tld].should == "com.kg"
46
- @domain_parser.parse("http://pauldix.com.aichi.jp")[:tld].should == "com.aichi.jp"
43
+ @domain_parser.parse("http://pauldix.net")[:public_suffix].should == "net"
44
+ @domain_parser.parse("http://pauldix.co.uk")[:public_suffix].should == "co.uk"
45
+ @domain_parser.parse("http://pauldix.com.kg")[:public_suffix].should == "com.kg"
46
+ @domain_parser.parse("http://pauldix.com.aichi.jp")[:public_suffix].should == "com.aichi.jp"
47
47
  end
48
48
 
49
49
  it "should have the domain" do
@@ -5,8 +5,8 @@ describe "url" do
5
5
  Domainatrix::Url.new(:url => "http://pauldix.net").url.should == "http://pauldix.net"
6
6
  end
7
7
 
8
- it "has the tld" do
9
- Domainatrix::Url.new(:tld => "net").tld.should == "net"
8
+ it "has the public_suffix" do
9
+ Domainatrix::Url.new(:public_suffix => "net").public_suffix.should == "net"
10
10
  end
11
11
 
12
12
  it "has the domain" do
@@ -22,20 +22,20 @@ describe "url" do
22
22
  end
23
23
 
24
24
  it "canonicalizes the url" do
25
- Domainatrix::Url.new(:domain => "pauldix", :tld => "net").canonical.should == "net.pauldix"
26
- Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :tld => "net").canonical.should == "net.pauldix.foo"
27
- Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :tld => "net").canonical.should == "net.pauldix.bar.foo"
28
- Domainatrix::Url.new(:domain => "pauldix", :tld => "co.uk").canonical.should == "uk.co.pauldix"
29
- Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :tld => "co.uk").canonical.should == "uk.co.pauldix.foo"
30
- Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :tld => "co.uk").canonical.should == "uk.co.pauldix.bar.foo"
31
- Domainatrix::Url.new(:subdomain => "", :domain => "pauldix", :tld => "co.uk").canonical.should == "uk.co.pauldix"
25
+ Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix"
26
+ Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.foo"
27
+ Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.bar.foo"
28
+ Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
29
+ Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.foo"
30
+ Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.bar.foo"
31
+ Domainatrix::Url.new(:subdomain => "", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
32
32
  end
33
33
 
34
34
  it "canonicalizes the url with the path" do
35
- Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :tld => "net", :path => "/hello").canonical.should == "net.pauldix.foo/hello"
35
+ Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net", :path => "/hello").canonical.should == "net.pauldix.foo/hello"
36
36
  end
37
37
 
38
38
  it "canonicalizes the url without the path" do
39
- Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :tld => "net").canonical(:include_path => false).should == "net.pauldix.foo"
39
+ Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical(:include_path => false).should == "net.pauldix.foo"
40
40
  end
41
41
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: domainatrix
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Dix
@@ -59,6 +59,6 @@ rubyforge_project:
59
59
  rubygems_version: 1.3.5
60
60
  signing_key:
61
61
  specification_version: 2
62
- summary: A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding TLDs, and breaking them into their domain parts.
62
+ summary: A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding the public suffix, and breaking them into their domain parts.
63
63
  test_files: []
64
64