f1sherman-domainatrix 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.textile +64 -0
- data/lib/domainatrix/domain_parser.rb +77 -0
- data/lib/domainatrix/url.rb +33 -0
- data/lib/domainatrix.rb +14 -0
- data/lib/effective_tld_names.dat +5189 -0
- data/spec/domainatrix/domain_parser_spec.rb +71 -0
- data/spec/domainatrix/url_spec.rb +54 -0
- data/spec/domainatrix_spec.rb +16 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +10 -0
- metadata +87 -0
@@ -0,0 +1,71 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe "domain parser" do
|
4
|
+
before(:all) do
|
5
|
+
@domain_parser = Domainatrix::DomainParser.new("#{File.dirname(__FILE__)}/../../lib/effective_tld_names.dat")
|
6
|
+
end
|
7
|
+
|
8
|
+
describe "reading the dat file" do
|
9
|
+
it "creates a tree of the domain names" do
|
10
|
+
@domain_parser.public_suffixes.should be_a Hash
|
11
|
+
end
|
12
|
+
|
13
|
+
it "creates the first level of the tree" do
|
14
|
+
@domain_parser.public_suffixes.should have_key("com")
|
15
|
+
end
|
16
|
+
|
17
|
+
it "creates the first level of the tree even when the first doesn't appear on a line by itself" do
|
18
|
+
@domain_parser.public_suffixes.should have_key("uk")
|
19
|
+
end
|
20
|
+
|
21
|
+
it "creates lower levels of the tree" do
|
22
|
+
@domain_parser.public_suffixes["jp"].should have_key("ac")
|
23
|
+
@domain_parser.public_suffixes["jp"]["aichi"].should have_key("*")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe "parsing" do
|
28
|
+
it "returns a hash of parts" do
|
29
|
+
@domain_parser.parse("http://pauldix.net").should be_a Hash
|
30
|
+
end
|
31
|
+
|
32
|
+
it "includes the original url" do
|
33
|
+
@domain_parser.parse("http://www.pauldix.net")[:url].should == "http://www.pauldix.net"
|
34
|
+
end
|
35
|
+
|
36
|
+
it "includes the scheme" do
|
37
|
+
@domain_parser.parse("http://www.pauldix.net")[:scheme].should == "http"
|
38
|
+
end
|
39
|
+
|
40
|
+
it "includes the full host" do
|
41
|
+
@domain_parser.parse("http://www.pauldix.net")[:host].should == "www.pauldix.net"
|
42
|
+
end
|
43
|
+
|
44
|
+
it "parses out the path" do
|
45
|
+
@domain_parser.parse("http://pauldix.net/foo.html?asdf=foo")[:path].should == "/foo.html?asdf=foo"
|
46
|
+
@domain_parser.parse("http://pauldix.net?asdf=foo")[:path].should == "?asdf=foo"
|
47
|
+
@domain_parser.parse("http://pauldix.net")[:path].should == ""
|
48
|
+
end
|
49
|
+
|
50
|
+
it "parses the tld" do
|
51
|
+
@domain_parser.parse("http://pauldix.net")[:public_suffix].should == "net"
|
52
|
+
@domain_parser.parse("http://pauldix.co.uk")[:public_suffix].should == "co.uk"
|
53
|
+
@domain_parser.parse("http://pauldix.com.kg")[:public_suffix].should == "com.kg"
|
54
|
+
@domain_parser.parse("http://pauldix.com.aichi.jp")[:public_suffix].should == "com.aichi.jp"
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should have the domain" do
|
58
|
+
@domain_parser.parse("http://pauldix.net")[:domain].should == "pauldix"
|
59
|
+
@domain_parser.parse("http://foo.pauldix.net")[:domain].should == "pauldix"
|
60
|
+
@domain_parser.parse("http://pauldix.co.uk")[:domain].should == "pauldix"
|
61
|
+
@domain_parser.parse("http://foo.pauldix.co.uk")[:domain].should == "pauldix"
|
62
|
+
@domain_parser.parse("http://pauldix.com.kg")[:domain].should == "pauldix"
|
63
|
+
@domain_parser.parse("http://pauldix.com.aichi.jp")[:domain].should == "pauldix"
|
64
|
+
end
|
65
|
+
|
66
|
+
it "should have subdomains" do
|
67
|
+
@domain_parser.parse("http://foo.pauldix.net")[:subdomain].should == "foo"
|
68
|
+
@domain_parser.parse("http://bar.foo.pauldix.co.uk")[:subdomain].should == "bar.foo"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe "url" do
|
4
|
+
it "has the original url" do
|
5
|
+
Domainatrix::Url.new(:url => "http://pauldix.net").url.should == "http://pauldix.net"
|
6
|
+
end
|
7
|
+
|
8
|
+
it "has the public_suffix" do
|
9
|
+
Domainatrix::Url.new(:public_suffix => "net").public_suffix.should == "net"
|
10
|
+
end
|
11
|
+
|
12
|
+
it "has the domain" do
|
13
|
+
Domainatrix::Url.new(:domain => "pauldix").domain.should == "pauldix"
|
14
|
+
end
|
15
|
+
|
16
|
+
it "has the subdomain" do
|
17
|
+
Domainatrix::Url.new(:subdomain => "foo").subdomain.should == "foo"
|
18
|
+
end
|
19
|
+
|
20
|
+
it "has the path" do
|
21
|
+
Domainatrix::Url.new(:path => "/asdf.html").path.should == "/asdf.html"
|
22
|
+
end
|
23
|
+
|
24
|
+
it "canonicalizes the url" do
|
25
|
+
Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix"
|
26
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.foo"
|
27
|
+
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.bar.foo"
|
28
|
+
Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
|
29
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.foo"
|
30
|
+
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.bar.foo"
|
31
|
+
Domainatrix::Url.new(:subdomain => "", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
|
32
|
+
end
|
33
|
+
|
34
|
+
it "canonicalizes the url with the path" do
|
35
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net", :path => "/hello").canonical.should == "net.pauldix.foo/hello"
|
36
|
+
end
|
37
|
+
|
38
|
+
it "canonicalizes the url without the path" do
|
39
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical(:include_path => false).should == "net.pauldix.foo"
|
40
|
+
end
|
41
|
+
|
42
|
+
it "combines the domain with the public_suffix" do
|
43
|
+
Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").domain_with_public_suffix.should == "pauldix.net"
|
44
|
+
Domainatrix::Url.new(:domain => "foo", :public_suffix => "co.uk" ).domain_with_public_suffix.should == "foo.co.uk"
|
45
|
+
Domainatrix::Url.new(:subdomain => "baz", :domain => "bar", :public_suffix => "com").domain_with_public_suffix.should == "bar.com"
|
46
|
+
end
|
47
|
+
|
48
|
+
it "combines the domain with the public_suffix as an alias" do
|
49
|
+
Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").domain_with_tld.should == "pauldix.net"
|
50
|
+
Domainatrix::Url.new(:domain => "foo", :public_suffix => "co.uk" ).domain_with_tld.should == "foo.co.uk"
|
51
|
+
Domainatrix::Url.new(:subdomain => "baz", :domain => "bar", :public_suffix => "com").domain_with_tld.should == "bar.com"
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe "domainatrix" do
|
4
|
+
it "should parse into a url object" do
|
5
|
+
Domainatrix.parse("http://pauldix.net").should be_a Domainatrix::Url
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should canonicalize" do
|
9
|
+
Domainatrix.parse("http://pauldix.net").canonical.should == "net.pauldix"
|
10
|
+
Domainatrix.parse("http://pauldix.net/foo.html").canonical.should == "net.pauldix/foo.html"
|
11
|
+
Domainatrix.parse("http://pauldix.net/foo.html?asdf=bar").canonical.should == "net.pauldix/foo.html?asdf=bar"
|
12
|
+
Domainatrix.parse("http://foo.pauldix.net").canonical.should == "net.pauldix.foo"
|
13
|
+
Domainatrix.parse("http://foo.bar.pauldix.net").canonical.should == "net.pauldix.bar.foo"
|
14
|
+
Domainatrix.parse("http://pauldix.co.uk").canonical.should == "uk.co.pauldix"
|
15
|
+
end
|
16
|
+
end
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "rspec"
|
3
|
+
|
4
|
+
# gem install redgreen for colored test output
|
5
|
+
begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
|
6
|
+
|
7
|
+
path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
|
8
|
+
$LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
|
9
|
+
|
10
|
+
require "#{File.dirname(__FILE__)}/../lib/domainatrix"
|
metadata
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: f1sherman-domainatrix
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 11
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 10
|
10
|
+
version: 0.0.10
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Paul Dix
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2009-12-10 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: addressable
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 3
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
version: "0"
|
32
|
+
type: :runtime
|
33
|
+
version_requirements: *id001
|
34
|
+
description:
|
35
|
+
email: paul@pauldix.net
|
36
|
+
executables: []
|
37
|
+
|
38
|
+
extensions: []
|
39
|
+
|
40
|
+
extra_rdoc_files: []
|
41
|
+
|
42
|
+
files:
|
43
|
+
- lib/domainatrix.rb
|
44
|
+
- lib/effective_tld_names.dat
|
45
|
+
- lib/domainatrix/domain_parser.rb
|
46
|
+
- lib/domainatrix/url.rb
|
47
|
+
- README.textile
|
48
|
+
- spec/spec.opts
|
49
|
+
- spec/spec_helper.rb
|
50
|
+
- spec/domainatrix_spec.rb
|
51
|
+
- spec/domainatrix/domain_parser_spec.rb
|
52
|
+
- spec/domainatrix/url_spec.rb
|
53
|
+
homepage: http://github.com/pauldix/domainatrix
|
54
|
+
licenses: []
|
55
|
+
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
|
59
|
+
require_paths:
|
60
|
+
- lib
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
hash: 3
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
version: "0"
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
version: "0"
|
79
|
+
requirements: []
|
80
|
+
|
81
|
+
rubyforge_project:
|
82
|
+
rubygems_version: 1.8.10
|
83
|
+
signing_key:
|
84
|
+
specification_version: 2
|
85
|
+
summary: A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding the public suffix, and breaking them into their domain parts.
|
86
|
+
test_files: []
|
87
|
+
|