domainatrix 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +4 -4
- data/lib/domainatrix.rb +1 -1
- data/lib/domainatrix/domain_parser.rb +10 -10
- data/lib/domainatrix/url.rb +4 -4
- data/spec/domainatrix/domain_parser_spec.rb +9 -9
- data/spec/domainatrix/url_spec.rb +11 -11
- metadata +2 -2
data/README.textile
CHANGED
@@ -4,11 +4,11 @@ h1. Domainatrix
|
|
4
4
|
|
5
5
|
h2. Summary
|
6
6
|
|
7
|
-
A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding
|
7
|
+
A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding public suffixes, and breaking them into their domain parts.
|
8
8
|
|
9
9
|
h2. Description
|
10
10
|
|
11
|
-
This simple library can parse a URL into its canonical form. It uses the list of domains from "http://publicsuffix.org":http://publicsuffix.org to break the domain into its
|
11
|
+
This simple library can parse a URL into its canonical form. It uses the list of domains from "http://publicsuffix.org":http://publicsuffix.org to break the domain into its public suffix, domain, and subdomain.
|
12
12
|
|
13
13
|
h2. Installation
|
14
14
|
|
@@ -24,12 +24,12 @@ require 'domainatrix'
|
|
24
24
|
|
25
25
|
url = Domainatrix.parse("http://www.pauldix.net")
|
26
26
|
url.url # => "http://www.pauldix.net" (the original url)
|
27
|
-
url.
|
27
|
+
url.public_suffix # => "net"
|
28
28
|
url.domain # => "pauldix"
|
29
29
|
url.canonical # => "net.pauldix"
|
30
30
|
|
31
31
|
url = Domainatrix.parse("http://foo.bar.pauldix.co.uk/asdf.html?q=arg")
|
32
|
-
url.
|
32
|
+
url.public_suffix # => "co.uk"
|
33
33
|
url.domain # => "pauldix"
|
34
34
|
url.subdomain # => "foo.bar"
|
35
35
|
url.path # => "/asdf.html?q=arg"
|
data/lib/domainatrix.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
module Domainatrix
|
2
2
|
class DomainParser
|
3
|
-
attr_reader :
|
3
|
+
attr_reader :public_suffixes
|
4
4
|
|
5
5
|
def initialize(file_name)
|
6
|
-
@
|
6
|
+
@public_suffixes = {}
|
7
7
|
read_dat_file(file_name)
|
8
8
|
end
|
9
9
|
|
@@ -13,7 +13,7 @@ module Domainatrix
|
|
13
13
|
unless (line =~ /\/\//) || line.empty?
|
14
14
|
parts = line.split(".").reverse
|
15
15
|
|
16
|
-
sub_hash = @
|
16
|
+
sub_hash = @public_suffixes
|
17
17
|
parts.each do |part|
|
18
18
|
sub_hash = (sub_hash[part] ||= {})
|
19
19
|
end
|
@@ -33,31 +33,31 @@ module Domainatrix
|
|
33
33
|
|
34
34
|
def parse_domains_from_host(host)
|
35
35
|
parts = host.split(".").reverse
|
36
|
-
|
36
|
+
public_suffix = []
|
37
37
|
domain = ""
|
38
38
|
subdomains = []
|
39
|
-
sub_hash = @
|
39
|
+
sub_hash = @public_suffixes
|
40
40
|
parts.each_index do |i|
|
41
41
|
part = parts[i]
|
42
42
|
|
43
43
|
sub_parts = sub_hash[part]
|
44
44
|
sub_hash = sub_parts
|
45
45
|
if sub_parts.has_key? "*"
|
46
|
-
|
47
|
-
|
46
|
+
public_suffix << part
|
47
|
+
public_suffix << parts[i+1]
|
48
48
|
domain = parts[i+2]
|
49
49
|
subdomains = parts.slice(i+3, parts.size)
|
50
50
|
break
|
51
51
|
elsif sub_parts.empty? || !sub_parts.has_key?(parts[i+1])
|
52
|
-
|
52
|
+
public_suffix << part
|
53
53
|
domain = parts[i+1]
|
54
54
|
subdomains = parts.slice(i+2, parts.size)
|
55
55
|
break
|
56
56
|
else
|
57
|
-
|
57
|
+
public_suffix << part
|
58
58
|
end
|
59
59
|
end
|
60
|
-
{:
|
60
|
+
{:public_suffix => public_suffix.reverse.join("."), :domain => domain, :subdomain => subdomains.reverse.join(".")}
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|
data/lib/domainatrix/url.rb
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
module Domainatrix
|
2
2
|
class Url
|
3
|
-
attr_reader :
|
3
|
+
attr_reader :public_suffix, :domain, :subdomain, :path, :url
|
4
4
|
|
5
5
|
def initialize(attrs = {})
|
6
6
|
@url = attrs[:url]
|
7
|
-
@
|
7
|
+
@public_suffix = attrs[:public_suffix]
|
8
8
|
@domain = attrs[:domain]
|
9
9
|
@subdomain = attrs[:subdomain]
|
10
10
|
@path = attrs[:path]
|
11
11
|
end
|
12
12
|
|
13
13
|
def canonical(options = {})
|
14
|
-
|
15
|
-
url = "#{
|
14
|
+
public_suffix_parts = @public_suffix.split(".")
|
15
|
+
url = "#{public_suffix_parts.reverse.join(".")}.#{@domain}"
|
16
16
|
if @subdomain && !@subdomain.empty?
|
17
17
|
subdomain_parts = @subdomain.split(".")
|
18
18
|
url << ".#{subdomain_parts.reverse.join(".")}"
|
@@ -7,20 +7,20 @@ describe "domain parser" do
|
|
7
7
|
|
8
8
|
describe "reading the dat file" do
|
9
9
|
it "creates a tree of the domain names" do
|
10
|
-
@domain_parser.
|
10
|
+
@domain_parser.public_suffixes.should be_a Hash
|
11
11
|
end
|
12
12
|
|
13
13
|
it "creates the first level of the tree" do
|
14
|
-
@domain_parser.
|
14
|
+
@domain_parser.public_suffixes.should have_key("com")
|
15
15
|
end
|
16
16
|
|
17
17
|
it "creates the first level of the tree even when the first doesn't appear on a line by itself" do
|
18
|
-
@domain_parser.
|
18
|
+
@domain_parser.public_suffixes.should have_key("uk")
|
19
19
|
end
|
20
20
|
|
21
21
|
it "creates lower levels of the tree" do
|
22
|
-
@domain_parser.
|
23
|
-
@domain_parser.
|
22
|
+
@domain_parser.public_suffixes["jp"].should have_key("ac")
|
23
|
+
@domain_parser.public_suffixes["jp"]["aichi"].should have_key("*")
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
@@ -40,10 +40,10 @@ describe "domain parser" do
|
|
40
40
|
end
|
41
41
|
|
42
42
|
it "parses the tld" do
|
43
|
-
@domain_parser.parse("http://pauldix.net")[:
|
44
|
-
@domain_parser.parse("http://pauldix.co.uk")[:
|
45
|
-
@domain_parser.parse("http://pauldix.com.kg")[:
|
46
|
-
@domain_parser.parse("http://pauldix.com.aichi.jp")[:
|
43
|
+
@domain_parser.parse("http://pauldix.net")[:public_suffix].should == "net"
|
44
|
+
@domain_parser.parse("http://pauldix.co.uk")[:public_suffix].should == "co.uk"
|
45
|
+
@domain_parser.parse("http://pauldix.com.kg")[:public_suffix].should == "com.kg"
|
46
|
+
@domain_parser.parse("http://pauldix.com.aichi.jp")[:public_suffix].should == "com.aichi.jp"
|
47
47
|
end
|
48
48
|
|
49
49
|
it "should have the domain" do
|
@@ -5,8 +5,8 @@ describe "url" do
|
|
5
5
|
Domainatrix::Url.new(:url => "http://pauldix.net").url.should == "http://pauldix.net"
|
6
6
|
end
|
7
7
|
|
8
|
-
it "has the
|
9
|
-
Domainatrix::Url.new(:
|
8
|
+
it "has the public_suffix" do
|
9
|
+
Domainatrix::Url.new(:public_suffix => "net").public_suffix.should == "net"
|
10
10
|
end
|
11
11
|
|
12
12
|
it "has the domain" do
|
@@ -22,20 +22,20 @@ describe "url" do
|
|
22
22
|
end
|
23
23
|
|
24
24
|
it "canonicalizes the url" do
|
25
|
-
Domainatrix::Url.new(:domain => "pauldix", :
|
26
|
-
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :
|
27
|
-
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :
|
28
|
-
Domainatrix::Url.new(:domain => "pauldix", :
|
29
|
-
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :
|
30
|
-
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :
|
31
|
-
Domainatrix::Url.new(:subdomain => "", :domain => "pauldix", :
|
25
|
+
Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix"
|
26
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.foo"
|
27
|
+
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.bar.foo"
|
28
|
+
Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
|
29
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.foo"
|
30
|
+
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.bar.foo"
|
31
|
+
Domainatrix::Url.new(:subdomain => "", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
|
32
32
|
end
|
33
33
|
|
34
34
|
it "canonicalizes the url with the path" do
|
35
|
-
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :
|
35
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net", :path => "/hello").canonical.should == "net.pauldix.foo/hello"
|
36
36
|
end
|
37
37
|
|
38
38
|
it "canonicalizes the url without the path" do
|
39
|
-
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :
|
39
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical(:include_path => false).should == "net.pauldix.foo"
|
40
40
|
end
|
41
41
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: domainatrix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Dix
|
@@ -59,6 +59,6 @@ rubyforge_project:
|
|
59
59
|
rubygems_version: 1.3.5
|
60
60
|
signing_key:
|
61
61
|
specification_version: 2
|
62
|
-
summary: A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding
|
62
|
+
summary: A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding the public suffix, and breaking them into their domain parts.
|
63
63
|
test_files: []
|
64
64
|
|