f1sherman-domainatrix 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,71 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe "domain parser" do
4
+ before(:all) do
5
+ @domain_parser = Domainatrix::DomainParser.new("#{File.dirname(__FILE__)}/../../lib/effective_tld_names.dat")
6
+ end
7
+
8
+ describe "reading the dat file" do
9
+ it "creates a tree of the domain names" do
10
+ @domain_parser.public_suffixes.should be_a Hash
11
+ end
12
+
13
+ it "creates the first level of the tree" do
14
+ @domain_parser.public_suffixes.should have_key("com")
15
+ end
16
+
17
+ it "creates the first level of the tree even when the first doesn't appear on a line by itself" do
18
+ @domain_parser.public_suffixes.should have_key("uk")
19
+ end
20
+
21
+ it "creates lower levels of the tree" do
22
+ @domain_parser.public_suffixes["jp"].should have_key("ac")
23
+ @domain_parser.public_suffixes["jp"]["aichi"].should have_key("*")
24
+ end
25
+ end
26
+
27
+ describe "parsing" do
28
+ it "returns a hash of parts" do
29
+ @domain_parser.parse("http://pauldix.net").should be_a Hash
30
+ end
31
+
32
+ it "includes the original url" do
33
+ @domain_parser.parse("http://www.pauldix.net")[:url].should == "http://www.pauldix.net"
34
+ end
35
+
36
+ it "includes the scheme" do
37
+ @domain_parser.parse("http://www.pauldix.net")[:scheme].should == "http"
38
+ end
39
+
40
+ it "includes the full host" do
41
+ @domain_parser.parse("http://www.pauldix.net")[:host].should == "www.pauldix.net"
42
+ end
43
+
44
+ it "parses out the path" do
45
+ @domain_parser.parse("http://pauldix.net/foo.html?asdf=foo")[:path].should == "/foo.html?asdf=foo"
46
+ @domain_parser.parse("http://pauldix.net?asdf=foo")[:path].should == "?asdf=foo"
47
+ @domain_parser.parse("http://pauldix.net")[:path].should == ""
48
+ end
49
+
50
+ it "parses the tld" do
51
+ @domain_parser.parse("http://pauldix.net")[:public_suffix].should == "net"
52
+ @domain_parser.parse("http://pauldix.co.uk")[:public_suffix].should == "co.uk"
53
+ @domain_parser.parse("http://pauldix.com.kg")[:public_suffix].should == "com.kg"
54
+ @domain_parser.parse("http://pauldix.com.aichi.jp")[:public_suffix].should == "com.aichi.jp"
55
+ end
56
+
57
+ it "should have the domain" do
58
+ @domain_parser.parse("http://pauldix.net")[:domain].should == "pauldix"
59
+ @domain_parser.parse("http://foo.pauldix.net")[:domain].should == "pauldix"
60
+ @domain_parser.parse("http://pauldix.co.uk")[:domain].should == "pauldix"
61
+ @domain_parser.parse("http://foo.pauldix.co.uk")[:domain].should == "pauldix"
62
+ @domain_parser.parse("http://pauldix.com.kg")[:domain].should == "pauldix"
63
+ @domain_parser.parse("http://pauldix.com.aichi.jp")[:domain].should == "pauldix"
64
+ end
65
+
66
+ it "should have subdomains" do
67
+ @domain_parser.parse("http://foo.pauldix.net")[:subdomain].should == "foo"
68
+ @domain_parser.parse("http://bar.foo.pauldix.co.uk")[:subdomain].should == "bar.foo"
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,54 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe "url" do
4
+ it "has the original url" do
5
+ Domainatrix::Url.new(:url => "http://pauldix.net").url.should == "http://pauldix.net"
6
+ end
7
+
8
+ it "has the public_suffix" do
9
+ Domainatrix::Url.new(:public_suffix => "net").public_suffix.should == "net"
10
+ end
11
+
12
+ it "has the domain" do
13
+ Domainatrix::Url.new(:domain => "pauldix").domain.should == "pauldix"
14
+ end
15
+
16
+ it "has the subdomain" do
17
+ Domainatrix::Url.new(:subdomain => "foo").subdomain.should == "foo"
18
+ end
19
+
20
+ it "has the path" do
21
+ Domainatrix::Url.new(:path => "/asdf.html").path.should == "/asdf.html"
22
+ end
23
+
24
+ it "canonicalizes the url" do
25
+ Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix"
26
+ Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.foo"
27
+ Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.bar.foo"
28
+ Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
29
+ Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.foo"
30
+ Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.bar.foo"
31
+ Domainatrix::Url.new(:subdomain => "", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
32
+ end
33
+
34
+ it "canonicalizes the url with the path" do
35
+ Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net", :path => "/hello").canonical.should == "net.pauldix.foo/hello"
36
+ end
37
+
38
+ it "canonicalizes the url without the path" do
39
+ Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical(:include_path => false).should == "net.pauldix.foo"
40
+ end
41
+
42
+ it "combines the domain with the public_suffix" do
43
+ Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").domain_with_public_suffix.should == "pauldix.net"
44
+ Domainatrix::Url.new(:domain => "foo", :public_suffix => "co.uk" ).domain_with_public_suffix.should == "foo.co.uk"
45
+ Domainatrix::Url.new(:subdomain => "baz", :domain => "bar", :public_suffix => "com").domain_with_public_suffix.should == "bar.com"
46
+ end
47
+
48
+ it "combines the domain with the public_suffix as an alias" do
49
+ Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").domain_with_tld.should == "pauldix.net"
50
+ Domainatrix::Url.new(:domain => "foo", :public_suffix => "co.uk" ).domain_with_tld.should == "foo.co.uk"
51
+ Domainatrix::Url.new(:subdomain => "baz", :domain => "bar", :public_suffix => "com").domain_with_tld.should == "bar.com"
52
+ end
53
+
54
+ end
@@ -0,0 +1,16 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe "domainatrix" do
4
+ it "should parse into a url object" do
5
+ Domainatrix.parse("http://pauldix.net").should be_a Domainatrix::Url
6
+ end
7
+
8
+ it "should canonicalize" do
9
+ Domainatrix.parse("http://pauldix.net").canonical.should == "net.pauldix"
10
+ Domainatrix.parse("http://pauldix.net/foo.html").canonical.should == "net.pauldix/foo.html"
11
+ Domainatrix.parse("http://pauldix.net/foo.html?asdf=bar").canonical.should == "net.pauldix/foo.html?asdf=bar"
12
+ Domainatrix.parse("http://foo.pauldix.net").canonical.should == "net.pauldix.foo"
13
+ Domainatrix.parse("http://foo.bar.pauldix.net").canonical.should == "net.pauldix.bar.foo"
14
+ Domainatrix.parse("http://pauldix.co.uk").canonical.should == "uk.co.pauldix"
15
+ end
16
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --diff
2
+ --color
@@ -0,0 +1,10 @@
1
+ require "rubygems"
2
+ require "rspec"
3
+
4
+ # gem install redgreen for colored test output
5
+ begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
6
+
7
+ path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
8
+ $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
9
+
10
+ require "#{File.dirname(__FILE__)}/../lib/domainatrix"
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: f1sherman-domainatrix
3
+ version: !ruby/object:Gem::Version
4
+ hash: 11
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 10
10
+ version: 0.0.10
11
+ platform: ruby
12
+ authors:
13
+ - Paul Dix
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2009-12-10 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: addressable
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ description:
35
+ email: paul@pauldix.net
36
+ executables: []
37
+
38
+ extensions: []
39
+
40
+ extra_rdoc_files: []
41
+
42
+ files:
43
+ - lib/domainatrix.rb
44
+ - lib/effective_tld_names.dat
45
+ - lib/domainatrix/domain_parser.rb
46
+ - lib/domainatrix/url.rb
47
+ - README.textile
48
+ - spec/spec.opts
49
+ - spec/spec_helper.rb
50
+ - spec/domainatrix_spec.rb
51
+ - spec/domainatrix/domain_parser_spec.rb
52
+ - spec/domainatrix/url_spec.rb
53
+ homepage: http://github.com/pauldix/domainatrix
54
+ licenses: []
55
+
56
+ post_install_message:
57
+ rdoc_options: []
58
+
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ hash: 3
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ requirements: []
80
+
81
+ rubyforge_project:
82
+ rubygems_version: 1.8.10
83
+ signing_key:
84
+ specification_version: 2
85
+ summary: A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding the public suffix, and breaking them into their domain parts.
86
+ test_files: []
87
+