domain_prefix 0.2.2.4 → 0.3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +59 -0
- data/LICENSE +1 -1
- data/README.md +1 -1
- data/Rakefile +10 -0
- data/VERSION +1 -1
- data/data/effective_tld_names.dat +883 -107
- data/domain_prefix.gemspec +18 -5
- data/lib/domain_prefix.rb +24 -67
- data/lib/domain_prefix/tree.rb +61 -0
- data/test/helper.rb +2 -0
- data/test/sample/test.txt +27 -7
- data/test/test_domain_prefix.rb +2 -1
- metadata +54 -11
data/domain_prefix.gemspec
CHANGED
@@ -2,14 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
+
# stub: domain_prefix 0.3.0.0 ruby lib
|
5
6
|
|
6
7
|
Gem::Specification.new do |s|
|
7
8
|
s.name = "domain_prefix"
|
8
|
-
s.version = "0.
|
9
|
+
s.version = "0.3.0.0"
|
9
10
|
|
10
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib"]
|
11
13
|
s.authors = ["tadman"]
|
12
|
-
s.date = "
|
14
|
+
s.date = "2014-02-28"
|
13
15
|
s.description = "A library to extract information about top-level domain and registered name from generic and international domain names"
|
14
16
|
s.email = "github@tadman.ca"
|
15
17
|
s.extra_rdoc_files = [
|
@@ -18,6 +20,8 @@ Gem::Specification.new do |s|
|
|
18
20
|
]
|
19
21
|
s.files = [
|
20
22
|
".document",
|
23
|
+
"Gemfile",
|
24
|
+
"Gemfile.lock",
|
21
25
|
"LICENSE",
|
22
26
|
"README.md",
|
23
27
|
"Rakefile",
|
@@ -25,23 +29,32 @@ Gem::Specification.new do |s|
|
|
25
29
|
"data/effective_tld_names.dat",
|
26
30
|
"domain_prefix.gemspec",
|
27
31
|
"lib/domain_prefix.rb",
|
32
|
+
"lib/domain_prefix/tree.rb",
|
28
33
|
"test/helper.rb",
|
29
34
|
"test/sample/README",
|
30
35
|
"test/sample/test.txt",
|
31
36
|
"test/test_domain_prefix.rb"
|
32
37
|
]
|
33
38
|
s.homepage = "http://github.com/twg/domain_prefix"
|
34
|
-
s.
|
35
|
-
s.rubygems_version = "1.8.23"
|
39
|
+
s.rubygems_version = "2.2.0"
|
36
40
|
s.summary = "Domain Prefix Extraction Library"
|
37
41
|
|
38
42
|
if s.respond_to? :specification_version then
|
39
|
-
s.specification_version =
|
43
|
+
s.specification_version = 4
|
40
44
|
|
41
45
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
46
|
+
s.add_runtime_dependency(%q<simpleidn>, [">= 0.0.5"])
|
47
|
+
s.add_development_dependency(%q<turn>, [">= 0.9.0"])
|
48
|
+
s.add_development_dependency(%q<jeweler>, [">= 2.0.0"])
|
42
49
|
else
|
50
|
+
s.add_dependency(%q<simpleidn>, [">= 0.0.5"])
|
51
|
+
s.add_dependency(%q<turn>, [">= 0.9.0"])
|
52
|
+
s.add_dependency(%q<jeweler>, [">= 2.0.0"])
|
43
53
|
end
|
44
54
|
else
|
55
|
+
s.add_dependency(%q<simpleidn>, [">= 0.0.5"])
|
56
|
+
s.add_dependency(%q<turn>, [">= 0.9.0"])
|
57
|
+
s.add_dependency(%q<jeweler>, [">= 2.0.0"])
|
45
58
|
end
|
46
59
|
end
|
47
60
|
|
data/lib/domain_prefix.rb
CHANGED
@@ -1,65 +1,11 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
+
require 'simpleidn'
|
2
3
|
|
3
4
|
module DomainPrefix
|
4
|
-
|
5
|
-
|
6
|
-
class Tree < Hash
|
7
|
-
def insert(path)
|
8
|
-
leaf = path.split(SEPARATOR).reverse.inject(self) do |tree, component|
|
9
|
-
# Seeds an element into the tree structure by referencing it
|
10
|
-
tree[component.sub(/^!/, '')] ||= Tree.new
|
11
|
-
end
|
12
|
-
|
13
|
-
if (path.match(/^[\!]/))
|
14
|
-
leaf[:required] = 0
|
15
|
-
else
|
16
|
-
leaf[:required] = 1
|
17
|
-
end
|
18
|
-
|
19
|
-
self
|
20
|
-
end
|
21
|
-
|
22
|
-
def follow(path)
|
23
|
-
path = path.to_s.split(SEPARATOR) unless (path.is_a?(Array))
|
24
|
-
path = path.reverse
|
25
|
-
|
26
|
-
index = traverse(path)
|
27
|
-
|
28
|
-
index and index <= path.length and path[0, index].reverse
|
29
|
-
end
|
30
|
-
|
31
|
-
protected
|
32
|
-
def traverse(path, index = 0)
|
33
|
-
component = path[index]
|
34
|
-
|
35
|
-
unless (component)
|
36
|
-
return self[:required] == 0 ? index : nil
|
37
|
-
end
|
38
|
-
|
39
|
-
named_branch = self[component]
|
5
|
+
require 'domain_prefix/tree'
|
40
6
|
|
41
|
-
|
42
|
-
result = named_branch.traverse(path, index + 1)
|
43
|
-
|
44
|
-
return result if (result)
|
45
|
-
end
|
46
|
-
|
47
|
-
wildcard_branch = self["*"]
|
48
|
-
|
49
|
-
if (wildcard_branch)
|
50
|
-
result = wildcard_branch.traverse(path, index + 1)
|
51
|
-
|
52
|
-
return result if (result)
|
53
|
-
end
|
54
|
-
|
55
|
-
if (!named_branch and !wildcard_branch and self[:required])
|
56
|
-
return index + self[:required]
|
57
|
-
end
|
7
|
+
SEPARATOR = '.'.freeze
|
58
8
|
|
59
|
-
return
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
9
|
TLDIFIER_SOURCE_FILE = File.expand_path(File.join('..', 'data', 'effective_tld_names.dat'), File.dirname(__FILE__))
|
64
10
|
|
65
11
|
TLD_SET = begin
|
@@ -82,19 +28,24 @@ module DomainPrefix
|
|
82
28
|
'local' => true
|
83
29
|
}.freeze
|
84
30
|
|
31
|
+
# Returns a cleaned up, canonical version of a domain name.
|
85
32
|
def rfc3492_canonical_domain(domain)
|
86
|
-
# FIX: Full implementation of
|
33
|
+
# FIX: Full implementation of RFC3429 required.
|
34
|
+
# http://www.ietf.org/rfc/rfc3492.txt
|
87
35
|
domain and domain.downcase
|
88
36
|
end
|
89
37
|
|
38
|
+
# Returns true if the given tld is listed as public, false otherwise.
|
90
39
|
def public_tld?(tld)
|
91
40
|
!NONPUBLIC_TLD.key?(tld)
|
92
41
|
end
|
93
42
|
|
94
|
-
|
95
|
-
|
43
|
+
# Returns the registered domain name for a given FQDN or nil if one cannot
|
44
|
+
# be determined.
|
45
|
+
def registered_domain(fqdn, rules = :strict)
|
46
|
+
return unless (fqdn)
|
96
47
|
|
97
|
-
components = rfc3492_canonical_domain(
|
48
|
+
components = rfc3492_canonical_domain(fqdn).split(SEPARATOR)
|
98
49
|
|
99
50
|
return if (components.empty? or components.find(&:empty?))
|
100
51
|
|
@@ -115,6 +66,8 @@ module DomainPrefix
|
|
115
66
|
suffix.join(SEPARATOR)
|
116
67
|
end
|
117
68
|
|
69
|
+
# Returns the public suffix (e.g. "co.uk") for a given domain or nil if one
|
70
|
+
# cannot be determined.
|
118
71
|
def public_suffix(domain)
|
119
72
|
return unless (domain)
|
120
73
|
|
@@ -133,15 +86,19 @@ module DomainPrefix
|
|
133
86
|
suffix.join(SEPARATOR)
|
134
87
|
end
|
135
88
|
|
136
|
-
|
137
|
-
|
89
|
+
# Returns the very top-level domain for a given domain, or nil if one cannot
|
90
|
+
# be determined.
|
91
|
+
def tld(fqdn)
|
92
|
+
suffix = public_suffix(rfc3492_canonical_domain(fqdn))
|
138
93
|
|
139
94
|
suffix and suffix.split(SEPARATOR).last
|
140
95
|
end
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
96
|
+
|
97
|
+
# Returns the name component of a given domain or nil if one cannot be
|
98
|
+
# determined.
|
99
|
+
def name(fqdn)
|
100
|
+
if (fqdn = registered_domain(fqdn))
|
101
|
+
fqdn.split(SEPARATOR).first
|
145
102
|
else
|
146
103
|
nil
|
147
104
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
class DomainPrefix::Tree < Hash
|
2
|
+
def insert(path)
|
3
|
+
components = path.sub(/^!/, '').split(DomainPrefix::SEPARATOR).reverse
|
4
|
+
|
5
|
+
leaves = components.inject([ self ]) do |trees, part|
|
6
|
+
[ part, SimpleIDN.to_unicode(part), SimpleIDN.to_ascii(part) ].uniq.flat_map do |l|
|
7
|
+
trees.collect do |tree|
|
8
|
+
tree[l] ||= self.class.new
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
required = path.match(/^[\!]/) ? 0 : 1
|
14
|
+
|
15
|
+
leaves.each do |leaf|
|
16
|
+
leaf[:required] = required
|
17
|
+
end
|
18
|
+
|
19
|
+
self
|
20
|
+
end
|
21
|
+
|
22
|
+
def follow(path)
|
23
|
+
path = path.to_s.split(SEPARATOR) unless (path.is_a?(Array))
|
24
|
+
path = path.reverse
|
25
|
+
|
26
|
+
index = traverse(path)
|
27
|
+
|
28
|
+
index and index <= path.length and path[0, index].reverse
|
29
|
+
end
|
30
|
+
|
31
|
+
protected
|
32
|
+
def traverse(path, index = 0)
|
33
|
+
component = path[index]
|
34
|
+
|
35
|
+
unless (component)
|
36
|
+
return self[:required] == 0 ? index : nil
|
37
|
+
end
|
38
|
+
|
39
|
+
named_branch = self[component]
|
40
|
+
|
41
|
+
if (named_branch)
|
42
|
+
result = named_branch.traverse(path, index + 1)
|
43
|
+
|
44
|
+
return result if (result)
|
45
|
+
end
|
46
|
+
|
47
|
+
wildcard_branch = self["*"]
|
48
|
+
|
49
|
+
if (wildcard_branch)
|
50
|
+
result = wildcard_branch.traverse(path, index + 1)
|
51
|
+
|
52
|
+
return result if (result)
|
53
|
+
end
|
54
|
+
|
55
|
+
if (!named_branch and !wildcard_branch and self[:required])
|
56
|
+
return index + self[:required]
|
57
|
+
end
|
58
|
+
|
59
|
+
return
|
60
|
+
end
|
61
|
+
end
|
data/test/helper.rb
CHANGED
data/test/sample/test.txt
CHANGED
@@ -60,12 +60,12 @@ checkPublicSuffix('a.b.c.kobe.jp', 'b.c.kobe.jp');
|
|
60
60
|
checkPublicSuffix('city.kobe.jp', 'city.kobe.jp');
|
61
61
|
checkPublicSuffix('www.city.kobe.jp', 'city.kobe.jp');
|
62
62
|
// TLD with a wildcard rule and exceptions.
|
63
|
-
checkPublicSuffix('
|
64
|
-
checkPublicSuffix('test.
|
65
|
-
checkPublicSuffix('b.test.
|
66
|
-
checkPublicSuffix('a.b.test.
|
67
|
-
checkPublicSuffix('
|
68
|
-
checkPublicSuffix('www.
|
63
|
+
checkPublicSuffix('ck', null);
|
64
|
+
checkPublicSuffix('test.ck', null);
|
65
|
+
checkPublicSuffix('b.test.ck', 'b.test.ck');
|
66
|
+
checkPublicSuffix('a.b.test.ck', 'b.test.ck');
|
67
|
+
checkPublicSuffix('www.ck', 'www.ck');
|
68
|
+
checkPublicSuffix('www.www.ck', 'www.ck');
|
69
69
|
// US K12.
|
70
70
|
checkPublicSuffix('us', null);
|
71
71
|
checkPublicSuffix('test.us', 'test.us');
|
@@ -75,4 +75,24 @@ checkPublicSuffix('test.ak.us', 'test.ak.us');
|
|
75
75
|
checkPublicSuffix('www.test.ak.us', 'test.ak.us');
|
76
76
|
checkPublicSuffix('k12.ak.us', null);
|
77
77
|
checkPublicSuffix('test.k12.ak.us', 'test.k12.ak.us');
|
78
|
-
checkPublicSuffix('www.test.k12.ak.us', 'test.k12.ak.us');
|
78
|
+
checkPublicSuffix('www.test.k12.ak.us', 'test.k12.ak.us');
|
79
|
+
// IDN labels.
|
80
|
+
checkPublicSuffix('食狮.com.cn', '食狮.com.cn');
|
81
|
+
checkPublicSuffix('食狮.公司.cn', '食狮.公司.cn');
|
82
|
+
checkPublicSuffix('www.食狮.公司.cn', '食狮.公司.cn');
|
83
|
+
checkPublicSuffix('shishi.公司.cn', 'shishi.公司.cn');
|
84
|
+
checkPublicSuffix('公司.cn', null);
|
85
|
+
checkPublicSuffix('食狮.中国', '食狮.中国');
|
86
|
+
checkPublicSuffix('www.食狮.中国', '食狮.中国');
|
87
|
+
checkPublicSuffix('shishi.中国', 'shishi.中国');
|
88
|
+
checkPublicSuffix('中国', null);
|
89
|
+
// Same as above, but punycoded.
|
90
|
+
checkPublicSuffix('xn--85x722f.com.cn', 'xn--85x722f.com.cn');
|
91
|
+
checkPublicSuffix('xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn');
|
92
|
+
checkPublicSuffix('www.xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn');
|
93
|
+
checkPublicSuffix('shishi.xn--55qx5d.cn', 'shishi.xn--55qx5d.cn');
|
94
|
+
checkPublicSuffix('xn--55qx5d.cn', null);
|
95
|
+
checkPublicSuffix('xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s');
|
96
|
+
checkPublicSuffix('www.xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s');
|
97
|
+
checkPublicSuffix('shishi.xn--fiqs8s', 'shishi.xn--fiqs8s');
|
98
|
+
checkPublicSuffix('xn--fiqs8s', null);
|
data/test/test_domain_prefix.rb
CHANGED
@@ -2,6 +2,7 @@ require_relative 'helper'
|
|
2
2
|
|
3
3
|
class TestDomainPrefix < Test::Unit::TestCase
|
4
4
|
def test_initialization
|
5
|
+
assert DomainPrefix::TLD_SET.length > 0
|
5
6
|
end
|
6
7
|
|
7
8
|
def test_examples
|
@@ -15,7 +16,7 @@ class TestDomainPrefix < Test::Unit::TestCase
|
|
15
16
|
'example.gc.ca' => %w[ example.gc.ca gc.ca ],
|
16
17
|
'example.co.uk' => %w[ example.co.uk co.uk ],
|
17
18
|
'example.com.au' => %w[ example.com.au com.au ],
|
18
|
-
'example.au' => [
|
19
|
+
'example.au' => %w[ example.au au ],
|
19
20
|
'example.bar.jp' => %w[ bar.jp jp ],
|
20
21
|
'example.bar.hokkaido.jp' =>%w[ bar.hokkaido.jp hokkaido.jp ],
|
21
22
|
'example.metro.tokyo.jp' => %w[ metro.tokyo.jp tokyo.jp ]
|
metadata
CHANGED
@@ -1,16 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: domain_prefix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.3.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- tadman
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
13
|
-
dependencies:
|
11
|
+
date: 2014-02-28 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: simpleidn
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.0.5
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.0.5
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: turn
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.9.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.9.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: jeweler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 2.0.0
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 2.0.0
|
14
55
|
description: A library to extract information about top-level domain and registered
|
15
56
|
name from generic and international domain names
|
16
57
|
email: github@tadman.ca
|
@@ -20,7 +61,9 @@ extra_rdoc_files:
|
|
20
61
|
- LICENSE
|
21
62
|
- README.md
|
22
63
|
files:
|
23
|
-
- .document
|
64
|
+
- ".document"
|
65
|
+
- Gemfile
|
66
|
+
- Gemfile.lock
|
24
67
|
- LICENSE
|
25
68
|
- README.md
|
26
69
|
- Rakefile
|
@@ -28,32 +71,32 @@ files:
|
|
28
71
|
- data/effective_tld_names.dat
|
29
72
|
- domain_prefix.gemspec
|
30
73
|
- lib/domain_prefix.rb
|
74
|
+
- lib/domain_prefix/tree.rb
|
31
75
|
- test/helper.rb
|
32
76
|
- test/sample/README
|
33
77
|
- test/sample/test.txt
|
34
78
|
- test/test_domain_prefix.rb
|
35
79
|
homepage: http://github.com/twg/domain_prefix
|
36
80
|
licenses: []
|
81
|
+
metadata: {}
|
37
82
|
post_install_message:
|
38
83
|
rdoc_options: []
|
39
84
|
require_paths:
|
40
85
|
- lib
|
41
86
|
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
87
|
requirements:
|
44
|
-
- -
|
88
|
+
- - ">="
|
45
89
|
- !ruby/object:Gem::Version
|
46
90
|
version: '0'
|
47
91
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
-
none: false
|
49
92
|
requirements:
|
50
|
-
- -
|
93
|
+
- - ">="
|
51
94
|
- !ruby/object:Gem::Version
|
52
95
|
version: '0'
|
53
96
|
requirements: []
|
54
97
|
rubyforge_project:
|
55
|
-
rubygems_version:
|
98
|
+
rubygems_version: 2.2.0
|
56
99
|
signing_key:
|
57
|
-
specification_version:
|
100
|
+
specification_version: 4
|
58
101
|
summary: Domain Prefix Extraction Library
|
59
102
|
test_files: []
|