f1sherman-domainatrix 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,71 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe "domain parser" do
4
+ before(:all) do
5
+ @domain_parser = Domainatrix::DomainParser.new("#{File.dirname(__FILE__)}/../../lib/effective_tld_names.dat")
6
+ end
7
+
8
+ describe "reading the dat file" do
9
+ it "creates a tree of the domain names" do
10
+ @domain_parser.public_suffixes.should be_a Hash
11
+ end
12
+
13
+ it "creates the first level of the tree" do
14
+ @domain_parser.public_suffixes.should have_key("com")
15
+ end
16
+
17
+ it "creates the first level of the tree even when the first doesn't appear on a line by itself" do
18
+ @domain_parser.public_suffixes.should have_key("uk")
19
+ end
20
+
21
+ it "creates lower levels of the tree" do
22
+ @domain_parser.public_suffixes["jp"].should have_key("ac")
23
+ @domain_parser.public_suffixes["jp"]["aichi"].should have_key("*")
24
+ end
25
+ end
26
+
27
+ describe "parsing" do
28
+ it "returns a hash of parts" do
29
+ @domain_parser.parse("http://pauldix.net").should be_a Hash
30
+ end
31
+
32
+ it "includes the original url" do
33
+ @domain_parser.parse("http://www.pauldix.net")[:url].should == "http://www.pauldix.net"
34
+ end
35
+
36
+ it "includes the scheme" do
37
+ @domain_parser.parse("http://www.pauldix.net")[:scheme].should == "http"
38
+ end
39
+
40
+ it "includes the full host" do
41
+ @domain_parser.parse("http://www.pauldix.net")[:host].should == "www.pauldix.net"
42
+ end
43
+
44
+ it "parses out the path" do
45
+ @domain_parser.parse("http://pauldix.net/foo.html?asdf=foo")[:path].should == "/foo.html?asdf=foo"
46
+ @domain_parser.parse("http://pauldix.net?asdf=foo")[:path].should == "?asdf=foo"
47
+ @domain_parser.parse("http://pauldix.net")[:path].should == ""
48
+ end
49
+
50
+ it "parses the tld" do
51
+ @domain_parser.parse("http://pauldix.net")[:public_suffix].should == "net"
52
+ @domain_parser.parse("http://pauldix.co.uk")[:public_suffix].should == "co.uk"
53
+ @domain_parser.parse("http://pauldix.com.kg")[:public_suffix].should == "com.kg"
54
+ @domain_parser.parse("http://pauldix.com.aichi.jp")[:public_suffix].should == "com.aichi.jp"
55
+ end
56
+
57
+ it "should have the domain" do
58
+ @domain_parser.parse("http://pauldix.net")[:domain].should == "pauldix"
59
+ @domain_parser.parse("http://foo.pauldix.net")[:domain].should == "pauldix"
60
+ @domain_parser.parse("http://pauldix.co.uk")[:domain].should == "pauldix"
61
+ @domain_parser.parse("http://foo.pauldix.co.uk")[:domain].should == "pauldix"
62
+ @domain_parser.parse("http://pauldix.com.kg")[:domain].should == "pauldix"
63
+ @domain_parser.parse("http://pauldix.com.aichi.jp")[:domain].should == "pauldix"
64
+ end
65
+
66
+ it "should have subdomains" do
67
+ @domain_parser.parse("http://foo.pauldix.net")[:subdomain].should == "foo"
68
+ @domain_parser.parse("http://bar.foo.pauldix.co.uk")[:subdomain].should == "bar.foo"
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,54 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe "url" do
4
+ it "has the original url" do
5
+ Domainatrix::Url.new(:url => "http://pauldix.net").url.should == "http://pauldix.net"
6
+ end
7
+
8
+ it "has the public_suffix" do
9
+ Domainatrix::Url.new(:public_suffix => "net").public_suffix.should == "net"
10
+ end
11
+
12
+ it "has the domain" do
13
+ Domainatrix::Url.new(:domain => "pauldix").domain.should == "pauldix"
14
+ end
15
+
16
+ it "has the subdomain" do
17
+ Domainatrix::Url.new(:subdomain => "foo").subdomain.should == "foo"
18
+ end
19
+
20
+ it "has the path" do
21
+ Domainatrix::Url.new(:path => "/asdf.html").path.should == "/asdf.html"
22
+ end
23
+
24
+ it "canonicalizes the url" do
25
+ Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix"
26
+ Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.foo"
27
+ Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.bar.foo"
28
+ Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
29
+ Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.foo"
30
+ Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.bar.foo"
31
+ Domainatrix::Url.new(:subdomain => "", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
32
+ end
33
+
34
+ it "canonicalizes the url with the path" do
35
+ Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net", :path => "/hello").canonical.should == "net.pauldix.foo/hello"
36
+ end
37
+
38
+ it "canonicalizes the url without the path" do
39
+ Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical(:include_path => false).should == "net.pauldix.foo"
40
+ end
41
+
42
+ it "combines the domain with the public_suffix" do
43
+ Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").domain_with_public_suffix.should == "pauldix.net"
44
+ Domainatrix::Url.new(:domain => "foo", :public_suffix => "co.uk" ).domain_with_public_suffix.should == "foo.co.uk"
45
+ Domainatrix::Url.new(:subdomain => "baz", :domain => "bar", :public_suffix => "com").domain_with_public_suffix.should == "bar.com"
46
+ end
47
+
48
+ it "combines the domain with the public_suffix as an alias" do
49
+ Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").domain_with_tld.should == "pauldix.net"
50
+ Domainatrix::Url.new(:domain => "foo", :public_suffix => "co.uk" ).domain_with_tld.should == "foo.co.uk"
51
+ Domainatrix::Url.new(:subdomain => "baz", :domain => "bar", :public_suffix => "com").domain_with_tld.should == "bar.com"
52
+ end
53
+
54
+ end
@@ -0,0 +1,16 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe "domainatrix" do
4
+ it "should parse into a url object" do
5
+ Domainatrix.parse("http://pauldix.net").should be_a Domainatrix::Url
6
+ end
7
+
8
+ it "should canonicalize" do
9
+ Domainatrix.parse("http://pauldix.net").canonical.should == "net.pauldix"
10
+ Domainatrix.parse("http://pauldix.net/foo.html").canonical.should == "net.pauldix/foo.html"
11
+ Domainatrix.parse("http://pauldix.net/foo.html?asdf=bar").canonical.should == "net.pauldix/foo.html?asdf=bar"
12
+ Domainatrix.parse("http://foo.pauldix.net").canonical.should == "net.pauldix.foo"
13
+ Domainatrix.parse("http://foo.bar.pauldix.net").canonical.should == "net.pauldix.bar.foo"
14
+ Domainatrix.parse("http://pauldix.co.uk").canonical.should == "uk.co.pauldix"
15
+ end
16
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --diff
2
+ --color
@@ -0,0 +1,10 @@
1
+ require "rubygems"
2
+ require "rspec"
3
+
4
+ # gem install redgreen for colored test output
5
+ begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
6
+
7
+ path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
8
+ $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
9
+
10
+ require "#{File.dirname(__FILE__)}/../lib/domainatrix"
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: f1sherman-domainatrix
3
+ version: !ruby/object:Gem::Version
4
+ hash: 11
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 10
10
+ version: 0.0.10
11
+ platform: ruby
12
+ authors:
13
+ - Paul Dix
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2009-12-10 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: addressable
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ description:
35
+ email: paul@pauldix.net
36
+ executables: []
37
+
38
+ extensions: []
39
+
40
+ extra_rdoc_files: []
41
+
42
+ files:
43
+ - lib/domainatrix.rb
44
+ - lib/effective_tld_names.dat
45
+ - lib/domainatrix/domain_parser.rb
46
+ - lib/domainatrix/url.rb
47
+ - README.textile
48
+ - spec/spec.opts
49
+ - spec/spec_helper.rb
50
+ - spec/domainatrix_spec.rb
51
+ - spec/domainatrix/domain_parser_spec.rb
52
+ - spec/domainatrix/url_spec.rb
53
+ homepage: http://github.com/pauldix/domainatrix
54
+ licenses: []
55
+
56
+ post_install_message:
57
+ rdoc_options: []
58
+
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ hash: 3
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ requirements: []
80
+
81
+ rubyforge_project:
82
+ rubygems_version: 1.8.10
83
+ signing_key:
84
+ specification_version: 2
85
+ summary: A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding the public suffix, and breaking them into their domain parts.
86
+ test_files: []
87
+