domainatrix 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.textile +63 -0
- data/lib/domainatrix.rb +13 -0
- data/lib/domainatrix/domain_parser.rb +63 -0
- data/lib/domainatrix/url.rb +24 -0
- data/lib/effective_tld_names.dat +4362 -0
- data/spec/domainatrix/domain_parser_spec.rb +63 -0
- data/spec/domainatrix/url_spec.rb +37 -0
- data/spec/domainatrix_spec.rb +16 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +10 -0
- metadata +64 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe "domain parser" do
|
4
|
+
before(:all) do
|
5
|
+
@domain_parser = Domainatrix::DomainParser.new("#{File.dirname(__FILE__)}/../../lib/effective_tld_names.dat")
|
6
|
+
end
|
7
|
+
|
8
|
+
describe "reading the dat file" do
|
9
|
+
it "creates a tree of the domain names" do
|
10
|
+
@domain_parser.tlds.should be_a Hash
|
11
|
+
end
|
12
|
+
|
13
|
+
it "creates the first level of the tree" do
|
14
|
+
@domain_parser.tlds.should have_key("com")
|
15
|
+
end
|
16
|
+
|
17
|
+
it "creates the first level of the tree even when the first doesn't appear on a line by itself" do
|
18
|
+
@domain_parser.tlds.should have_key("uk")
|
19
|
+
end
|
20
|
+
|
21
|
+
it "creates lower levels of the tree" do
|
22
|
+
@domain_parser.tlds["jp"].should have_key("ac")
|
23
|
+
@domain_parser.tlds["jp"]["aichi"].should have_key("*")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe "parsing" do
|
28
|
+
it "returns a hash of parts" do
|
29
|
+
@domain_parser.parse("http://pauldix.net").should be_a Hash
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should strip the http://" do
|
33
|
+
@domain_parser.parse("http://pauldix.net").values.each {|val| (val =~ /http\:\/\//).should_not be}
|
34
|
+
end
|
35
|
+
|
36
|
+
it "parses out the path" do
|
37
|
+
@domain_parser.parse("http://pauldix.net/foo.html?asdf=foo")[:path].should == "/foo.html?asdf=foo"
|
38
|
+
@domain_parser.parse("http://pauldix.net?asdf=foo")[:path].should == "?asdf=foo"
|
39
|
+
@domain_parser.parse("http://pauldix.net")[:path].should == ""
|
40
|
+
end
|
41
|
+
|
42
|
+
it "parses the tld" do
|
43
|
+
@domain_parser.parse("http://pauldix.net")[:tld].should == "net"
|
44
|
+
@domain_parser.parse("http://pauldix.co.uk")[:tld].should == "co.uk"
|
45
|
+
@domain_parser.parse("http://pauldix.com.kg")[:tld].should == "com.kg"
|
46
|
+
@domain_parser.parse("http://pauldix.com.aichi.jp")[:tld].should == "com.aichi.jp"
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should have the domain" do
|
50
|
+
@domain_parser.parse("http://pauldix.net")[:domain].should == "pauldix"
|
51
|
+
@domain_parser.parse("http://foo.pauldix.net")[:domain].should == "pauldix"
|
52
|
+
@domain_parser.parse("http://pauldix.co.uk")[:domain].should == "pauldix"
|
53
|
+
@domain_parser.parse("http://foo.pauldix.co.uk")[:domain].should == "pauldix"
|
54
|
+
@domain_parser.parse("http://pauldix.com.kg")[:domain].should == "pauldix"
|
55
|
+
@domain_parser.parse("http://pauldix.com.aichi.jp")[:domain].should == "pauldix"
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should have subdomains" do
|
59
|
+
@domain_parser.parse("http://foo.pauldix.net")[:subdomain].should == "foo"
|
60
|
+
@domain_parser.parse("http://bar.foo.pauldix.co.uk")[:subdomain].should == "bar.foo"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe "url" do
|
4
|
+
it "has the tld" do
|
5
|
+
Domainatrix::Url.new(:tld => "net").tld.should == "net"
|
6
|
+
end
|
7
|
+
|
8
|
+
it "has the domain" do
|
9
|
+
Domainatrix::Url.new(:domain => "pauldix").domain.should == "pauldix"
|
10
|
+
end
|
11
|
+
|
12
|
+
it "has the subdomain" do
|
13
|
+
Domainatrix::Url.new(:subdomain => "foo").subdomain.should == "foo"
|
14
|
+
end
|
15
|
+
|
16
|
+
it "has the path" do
|
17
|
+
Domainatrix::Url.new(:path => "/asdf.html").path.should == "/asdf.html"
|
18
|
+
end
|
19
|
+
|
20
|
+
it "canonicalizes the url" do
|
21
|
+
Domainatrix::Url.new(:domain => "pauldix", :tld => "net").canonical.should == "net.pauldix"
|
22
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :tld => "net").canonical.should == "net.pauldix.foo"
|
23
|
+
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :tld => "net").canonical.should == "net.pauldix.bar.foo"
|
24
|
+
Domainatrix::Url.new(:domain => "pauldix", :tld => "co.uk").canonical.should == "uk.co.pauldix"
|
25
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :tld => "co.uk").canonical.should == "uk.co.pauldix.foo"
|
26
|
+
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :tld => "co.uk").canonical.should == "uk.co.pauldix.bar.foo"
|
27
|
+
Domainatrix::Url.new(:subdomain => "", :domain => "pauldix", :tld => "co.uk").canonical.should == "uk.co.pauldix"
|
28
|
+
end
|
29
|
+
|
30
|
+
it "canonicalizes the url with the path" do
|
31
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :tld => "net", :path => "/hello").canonical.should == "net.pauldix.foo/hello"
|
32
|
+
end
|
33
|
+
|
34
|
+
it "canonicalizes the url without the path" do
|
35
|
+
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :tld => "net").canonical(:include_path => false).should == "net.pauldix.foo"
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe "domainatrix" do
|
4
|
+
it "should parse into a url object" do
|
5
|
+
Domainatrix.parse("http://pauldix.net").should be_a Domainatrix::Url
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should canonicalize" do
|
9
|
+
Domainatrix.parse("http://pauldix.net").canonical.should == "net.pauldix"
|
10
|
+
Domainatrix.parse("http://pauldix.net/foo.html").canonical.should == "net.pauldix/foo.html"
|
11
|
+
Domainatrix.parse("http://pauldix.net/foo.html?asdf=bar").canonical.should == "net.pauldix/foo.html?asdf=bar"
|
12
|
+
Domainatrix.parse("http://foo.pauldix.net").canonical.should == "net.pauldix.foo"
|
13
|
+
Domainatrix.parse("http://foo.bar.pauldix.net").canonical.should == "net.pauldix.bar.foo"
|
14
|
+
Domainatrix.parse("http://pauldix.co.uk").canonical.should == "uk.co.pauldix"
|
15
|
+
end
|
16
|
+
end
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "spec"
|
3
|
+
|
4
|
+
# gem install redgreen for colored test output
|
5
|
+
begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
|
6
|
+
|
7
|
+
path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
|
8
|
+
$LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
|
9
|
+
|
10
|
+
require "lib/domainatrix"
|
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: domainatrix
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Paul Dix
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-10 00:00:00 -05:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: paul@pauldix.net
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- lib/domainatrix.rb
|
26
|
+
- lib/effective_tld_names.dat
|
27
|
+
- lib/domainatrix/domain_parser.rb
|
28
|
+
- lib/domainatrix/url.rb
|
29
|
+
- README.textile
|
30
|
+
- spec/spec.opts
|
31
|
+
- spec/spec_helper.rb
|
32
|
+
- spec/domainatrix_spec.rb
|
33
|
+
- spec/domainatrix/domain_parser_spec.rb
|
34
|
+
- spec/domainatrix/url_spec.rb
|
35
|
+
has_rdoc: true
|
36
|
+
homepage: http://github.com/pauldix/domainatrix
|
37
|
+
licenses: []
|
38
|
+
|
39
|
+
post_install_message:
|
40
|
+
rdoc_options: []
|
41
|
+
|
42
|
+
require_paths:
|
43
|
+
- lib
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: "0"
|
49
|
+
version:
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: "0"
|
55
|
+
version:
|
56
|
+
requirements: []
|
57
|
+
|
58
|
+
rubyforge_project:
|
59
|
+
rubygems_version: 1.3.5
|
60
|
+
signing_key:
|
61
|
+
specification_version: 2
|
62
|
+
summary: A cruel misstress that uses the public suffix domain list to dominate URLs by canonicalizing, finding TLDs, and breaking them into their domain parts.
|
63
|
+
test_files: []
|
64
|
+
|