domainatrix 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.textile +4 -4
- data/lib/domainatrix.rb +1 -1
- data/lib/domainatrix/domain_parser.rb +10 -10
- data/lib/domainatrix/url.rb +4 -4
- data/spec/domainatrix/domain_parser_spec.rb +9 -9
- data/spec/domainatrix/url_spec.rb +11 -11
- metadata +2 -2
data/README.textile
CHANGED
@@ -4,11 +4,11 @@ h1. Domainatrix
|
|
4
4
|
|
5
5
|
h2. Summary
|
6
6
|
|
7
|
-
A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding
|
7
|
+
A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding public suffixes, and breaking them into their domain parts.
|
8
8
|
|
9
9
|
h2. Description
|
10
10
|
|
11
|
-
This simple library can parse a URL into its canonical form. It uses the list of domains from "http://publicsuffix.org":http://publicsuffix.org to break the domain into its
|
11
|
+
This simple library can parse a URL into its canonical form. It uses the list of domains from "http://publicsuffix.org":http://publicsuffix.org to break the domain into its public suffix, domain, and subdomain.
|
12
12
|
|
13
13
|
h2. Installation
|
14
14
|
|
@@ -24,12 +24,12 @@ require 'domainatrix'
|
|
24
24
|
|
25
25
|
url = Domainatrix.parse("http://www.pauldix.net")
|
26
26
|
url.url # => "http://www.pauldix.net" (the original url)
|
27
|
-
url.
|
27
|
+
url.public_suffix # => "net"
|
28
28
|
url.domain # => "pauldix"
|
29
29
|
url.canonical # => "net.pauldix"
|
30
30
|
|
31
31
|
url = Domainatrix.parse("http://foo.bar.pauldix.co.uk/asdf.html?q=arg")
|
32
|
-
url.
|
32
|
+
url.public_suffix # => "co.uk"
|
33
33
|
url.domain # => "pauldix"
|
34
34
|
url.subdomain # => "foo.bar"
|
35
35
|
url.path # => "/asdf.html?q=arg"
|
data/lib/domainatrix.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
module Domainatrix
|
2
2
|
class DomainParser
|
3
|
-
attr_reader :
|
3
|
+
attr_reader :public_suffixes
|
4
4
|
|
5
5
|
def initialize(file_name)
|
6
|
-
@
|
6
|
+
@public_suffixes = {}
|
7
7
|
read_dat_file(file_name)
|
8
8
|
end
|
9
9
|
|
@@ -13,7 +13,7 @@ module Domainatrix
|
|
13
13
|
unless (line =~ /\/\//) || line.empty?
|
14
14
|
parts = line.split(".").reverse
|
15
15
|
|
16
|
-
sub_hash = @
|
16
|
+
sub_hash = @public_suffixes
|
17
17
|
parts.each do |part|
|
18
18
|
sub_hash = (sub_hash[part] ||= {})
|
19
19
|
end
|
@@ -33,31 +33,31 @@ module Domainatrix
|
|
33
33
|
|
34
34
|
def parse_domains_from_host(host)
|
35
35
|
parts = host.split(".").reverse
|
36
|
-
|
36
|
+
public_suffix = []
|
37
37
|
domain = ""
|
38
38
|
subdomains = []
|
39
|
-
sub_hash = @
|
39
|
+
sub_hash = @public_suffixes
|
40
40
|
parts.each_index do |i|
|
41
41
|
part = parts[i]
|
42
42
|
|
43
43
|
sub_parts = sub_hash[part]
|
44
44
|
sub_hash = sub_parts
|
45
45
|
if sub_parts.has_key? "*"
|
46
|
-
|
47
|
-
|
46
|
+
public_suffix << part
|
47
|
+
public_suffix << parts[i+1]
|
48
48
|
domain = parts[i+2]
|
49
49
|
subdomains = parts.slice(i+3, parts.size)
|
50
50
|
break
|
51
51
|
elsif sub_parts.empty? || !sub_parts.has_key?(parts[i+1])
|
52
|
-
|
52
|
+
public_suffix << part
|
53
53
|
domain = parts[i+1]
|
54
54
|
subdomains = parts.slice(i+2, parts.size)
|
55
55
|
break
|
56
56
|
else
|
57
|
-
|
57
|
+
public_suffix << part
|
58
58
|
end
|
59
59
|
end
|
60
|
-
{:
|
60
|
+
{:public_suffix => public_suffix.reverse.join("."), :domain => domain, :subdomain => subdomains.reverse.join(".")}
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|
data/lib/domainatrix/url.rb
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
module Domainatrix
|
2
2
|
class Url
|
3
|
-
attr_reader :
|
3
|
+
attr_reader :public_suffix, :domain, :subdomain, :path, :url
|
4
4
|
|
5
5
|
def initialize(attrs = {})
|
6
6
|
@url = attrs[:url]
|
7
|
-
@
|
7
|
+
@public_suffix = attrs[:public_suffix]
|
8
8
|
@domain = attrs[:domain]
|
9
9
|
@subdomain = attrs[:subdomain]
|
10
10
|
@path = attrs[:path]
|
11
11
|
end
|
12
12
|
|
13
13
|
def canonical(options = {})
|
14
|
-
|
15
|
-
url = "#{
|
14
|
+
public_suffix_parts = @public_suffix.split(".")
|
15
|
+
url = "#{public_suffix_parts.reverse.join(".")}.#{@domain}"
|
16
16
|
if @subdomain && !@subdomain.empty?
|
17
17
|
subdomain_parts = @subdomain.split(".")
|
18
18
|
url << ".#{subdomain_parts.reverse.join(".")}"
|
@@ -7,20 +7,20 @@ describe "domain parser" do
|
|
7
7
|
|
8
8
|
describe "reading the dat file" do
|
9
9
|
it "creates a tree of the domain names" do
|
10
|
-
@domain_parser.
|
10
|
+
@domain_parser.public_suffixes.should be_a Hash
|
11
11
|
end
|
12
12
|
|
13
13
|
it "creates the first level of the tree" do
|
14
|
-
@domain_parser.
|
14
|
+
@domain_parser.public_suffixes.should have_key("com")
|
15
15
|
end
|
16
16
|
|
17
17
|
it "creates the first level of the tree even when the first doesn't appear on a line by itself" do
|
18
|
-
@domain_parser.
|
18
|
+
@domain_parser.public_suffixes.should have_key("uk")
|
19
19
|
end
|
20
20
|
|
21
21
|
it "creates lower levels of the tree" do
|
22
|
-
@domain_parser.
|
23
|
-
@domain_parser.
|
22
|
+
@domain_parser.public_suffixes["jp"].should have_key("ac")
|
23
|
+
@domain_parser.public_suffixes["jp"]["aichi"].should have_key("*")
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
@@ -40,10 +40,10 @@ describe "domain parser" do
|
|
40
40
|
end
|
41
41
|
|
42
42
|
it "parses the tld" do
|
43
|
-
@domain_parser.parse("http://pauldix.net")[:
|
44
|
-
@domain_parser.parse("http://pauldix.co.uk")[:
|
45
|
-
@domain_parser.parse("http://pauldix.com.kg")[:
|
46
|
-
@domain_parser.parse("http://pauldix.com.aichi.jp")[:
|
43
|
+
@domain_parser.parse("http://pauldix.net")[:public_suffix].should == "net"
|
44
|
+
@domain_parser.parse("http://pauldix.co.uk")[:public_suffix].should == "co.uk"
|
45
|
+
@domain_parser.parse("http://pauldix.com.kg")[:public_suffix].should == "com.kg"
|
46
|
+
@domain_parser.parse("http://pauldix.com.aichi.jp")[:public_suffix].should == "com.aichi.jp"
|
47
47
|
end
|
48
48
|
|
49
49
|
it "should have the domain" do
|
@@ -5,8 +5,8 @@ describe "url" do
|
|
5
5
|
Domainatrix::Url.new(:url => "http://pauldix.net").url.should == "http://pauldix.net"
|
6
6
|
end
|
7
7
|
|
8
|
-
it "has the
|
9
|
-
Domainatrix::Url.new(:
|
8
|
+
it "has the public_suffix" do
|
9
|
+
Domainatrix::Url.new(:public_suffix => "net").public_suffix.should == "net"
|
10
10
|
end
|
11
11
|
|
12
12
|
it "has the domain" do
|
@@ -22,20 +22,20 @@ describe "url" do
|
|
22
22
|
end
|
23
23
|
|
24
24
|
it "canonicalizes the url" do
|
25
|
-
Domainatrix::Url.new(:domain => "pauldix", :
|
26
|
-
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :
|
27
|
-
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :
|
28
|
-
Domainatrix::Url.new(:domain => "pauldix", :
|
29
|
-
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :
|
30
|
-
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :
|
31
|
-
Domainatrix::Url.new(:subdomain => "", :domain => "pauldix", :
|
25
|
+
Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix"
|
26
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.foo"
|
27
|
+
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.bar.foo"
|
28
|
+
Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
|
29
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.foo"
|
30
|
+
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.bar.foo"
|
31
|
+
Domainatrix::Url.new(:subdomain => "", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
|
32
32
|
end
|
33
33
|
|
34
34
|
it "canonicalizes the url with the path" do
|
35
|
-
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :
|
35
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net", :path => "/hello").canonical.should == "net.pauldix.foo/hello"
|
36
36
|
end
|
37
37
|
|
38
38
|
it "canonicalizes the url without the path" do
|
39
|
-
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :
|
39
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical(:include_path => false).should == "net.pauldix.foo"
|
40
40
|
end
|
41
41
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: domainatrix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Dix
|
@@ -59,6 +59,6 @@ rubyforge_project:
|
|
59
59
|
rubygems_version: 1.3.5
|
60
60
|
signing_key:
|
61
61
|
specification_version: 2
|
62
|
-
summary: A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding
|
62
|
+
summary: A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding the public suffix, and breaking them into their domain parts.
|
63
63
|
test_files: []
|
64
64
|
|