domain_prefix 0.2.2.4 → 0.3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +59 -0
- data/LICENSE +1 -1
- data/README.md +1 -1
- data/Rakefile +10 -0
- data/VERSION +1 -1
- data/data/effective_tld_names.dat +883 -107
- data/domain_prefix.gemspec +18 -5
- data/lib/domain_prefix.rb +24 -67
- data/lib/domain_prefix/tree.rb +61 -0
- data/test/helper.rb +2 -0
- data/test/sample/test.txt +27 -7
- data/test/test_domain_prefix.rb +2 -1
- metadata +54 -11
data/domain_prefix.gemspec
CHANGED
@@ -2,14 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
+
# stub: domain_prefix 0.3.0.0 ruby lib
|
5
6
|
|
6
7
|
Gem::Specification.new do |s|
|
7
8
|
s.name = "domain_prefix"
|
8
|
-
s.version = "0.
|
9
|
+
s.version = "0.3.0.0"
|
9
10
|
|
10
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib"]
|
11
13
|
s.authors = ["tadman"]
|
12
|
-
s.date = "
|
14
|
+
s.date = "2014-02-28"
|
13
15
|
s.description = "A library to extract information about top-level domain and registered name from generic and international domain names"
|
14
16
|
s.email = "github@tadman.ca"
|
15
17
|
s.extra_rdoc_files = [
|
@@ -18,6 +20,8 @@ Gem::Specification.new do |s|
|
|
18
20
|
]
|
19
21
|
s.files = [
|
20
22
|
".document",
|
23
|
+
"Gemfile",
|
24
|
+
"Gemfile.lock",
|
21
25
|
"LICENSE",
|
22
26
|
"README.md",
|
23
27
|
"Rakefile",
|
@@ -25,23 +29,32 @@ Gem::Specification.new do |s|
|
|
25
29
|
"data/effective_tld_names.dat",
|
26
30
|
"domain_prefix.gemspec",
|
27
31
|
"lib/domain_prefix.rb",
|
32
|
+
"lib/domain_prefix/tree.rb",
|
28
33
|
"test/helper.rb",
|
29
34
|
"test/sample/README",
|
30
35
|
"test/sample/test.txt",
|
31
36
|
"test/test_domain_prefix.rb"
|
32
37
|
]
|
33
38
|
s.homepage = "http://github.com/twg/domain_prefix"
|
34
|
-
s.
|
35
|
-
s.rubygems_version = "1.8.23"
|
39
|
+
s.rubygems_version = "2.2.0"
|
36
40
|
s.summary = "Domain Prefix Extraction Library"
|
37
41
|
|
38
42
|
if s.respond_to? :specification_version then
|
39
|
-
s.specification_version =
|
43
|
+
s.specification_version = 4
|
40
44
|
|
41
45
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
46
|
+
s.add_runtime_dependency(%q<simpleidn>, [">= 0.0.5"])
|
47
|
+
s.add_development_dependency(%q<turn>, [">= 0.9.0"])
|
48
|
+
s.add_development_dependency(%q<jeweler>, [">= 2.0.0"])
|
42
49
|
else
|
50
|
+
s.add_dependency(%q<simpleidn>, [">= 0.0.5"])
|
51
|
+
s.add_dependency(%q<turn>, [">= 0.9.0"])
|
52
|
+
s.add_dependency(%q<jeweler>, [">= 2.0.0"])
|
43
53
|
end
|
44
54
|
else
|
55
|
+
s.add_dependency(%q<simpleidn>, [">= 0.0.5"])
|
56
|
+
s.add_dependency(%q<turn>, [">= 0.9.0"])
|
57
|
+
s.add_dependency(%q<jeweler>, [">= 2.0.0"])
|
45
58
|
end
|
46
59
|
end
|
47
60
|
|
data/lib/domain_prefix.rb
CHANGED
@@ -1,65 +1,11 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
+
require 'simpleidn'
|
2
3
|
|
3
4
|
module DomainPrefix
|
4
|
-
|
5
|
-
|
6
|
-
class Tree < Hash
|
7
|
-
def insert(path)
|
8
|
-
leaf = path.split(SEPARATOR).reverse.inject(self) do |tree, component|
|
9
|
-
# Seeds an element into the tree structure by referencing it
|
10
|
-
tree[component.sub(/^!/, '')] ||= Tree.new
|
11
|
-
end
|
12
|
-
|
13
|
-
if (path.match(/^[\!]/))
|
14
|
-
leaf[:required] = 0
|
15
|
-
else
|
16
|
-
leaf[:required] = 1
|
17
|
-
end
|
18
|
-
|
19
|
-
self
|
20
|
-
end
|
21
|
-
|
22
|
-
def follow(path)
|
23
|
-
path = path.to_s.split(SEPARATOR) unless (path.is_a?(Array))
|
24
|
-
path = path.reverse
|
25
|
-
|
26
|
-
index = traverse(path)
|
27
|
-
|
28
|
-
index and index <= path.length and path[0, index].reverse
|
29
|
-
end
|
30
|
-
|
31
|
-
protected
|
32
|
-
def traverse(path, index = 0)
|
33
|
-
component = path[index]
|
34
|
-
|
35
|
-
unless (component)
|
36
|
-
return self[:required] == 0 ? index : nil
|
37
|
-
end
|
38
|
-
|
39
|
-
named_branch = self[component]
|
5
|
+
require 'domain_prefix/tree'
|
40
6
|
|
41
|
-
|
42
|
-
result = named_branch.traverse(path, index + 1)
|
43
|
-
|
44
|
-
return result if (result)
|
45
|
-
end
|
46
|
-
|
47
|
-
wildcard_branch = self["*"]
|
48
|
-
|
49
|
-
if (wildcard_branch)
|
50
|
-
result = wildcard_branch.traverse(path, index + 1)
|
51
|
-
|
52
|
-
return result if (result)
|
53
|
-
end
|
54
|
-
|
55
|
-
if (!named_branch and !wildcard_branch and self[:required])
|
56
|
-
return index + self[:required]
|
57
|
-
end
|
7
|
+
SEPARATOR = '.'.freeze
|
58
8
|
|
59
|
-
return
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
9
|
TLDIFIER_SOURCE_FILE = File.expand_path(File.join('..', 'data', 'effective_tld_names.dat'), File.dirname(__FILE__))
|
64
10
|
|
65
11
|
TLD_SET = begin
|
@@ -82,19 +28,24 @@ module DomainPrefix
|
|
82
28
|
'local' => true
|
83
29
|
}.freeze
|
84
30
|
|
31
|
+
# Returns a cleaned up, canonical version of a domain name.
|
85
32
|
def rfc3492_canonical_domain(domain)
|
86
|
-
# FIX: Full implementation of
|
33
|
+
# FIX: Full implementation of RFC3429 required.
|
34
|
+
# http://www.ietf.org/rfc/rfc3492.txt
|
87
35
|
domain and domain.downcase
|
88
36
|
end
|
89
37
|
|
38
|
+
# Returns true if the given tld is listed as public, false otherwise.
|
90
39
|
def public_tld?(tld)
|
91
40
|
!NONPUBLIC_TLD.key?(tld)
|
92
41
|
end
|
93
42
|
|
94
|
-
|
95
|
-
|
43
|
+
# Returns the registered domain name for a given FQDN or nil if one cannot
|
44
|
+
# be determined.
|
45
|
+
def registered_domain(fqdn, rules = :strict)
|
46
|
+
return unless (fqdn)
|
96
47
|
|
97
|
-
components = rfc3492_canonical_domain(
|
48
|
+
components = rfc3492_canonical_domain(fqdn).split(SEPARATOR)
|
98
49
|
|
99
50
|
return if (components.empty? or components.find(&:empty?))
|
100
51
|
|
@@ -115,6 +66,8 @@ module DomainPrefix
|
|
115
66
|
suffix.join(SEPARATOR)
|
116
67
|
end
|
117
68
|
|
69
|
+
# Returns the public suffix (e.g. "co.uk") for a given domain or nil if one
|
70
|
+
# cannot be determined.
|
118
71
|
def public_suffix(domain)
|
119
72
|
return unless (domain)
|
120
73
|
|
@@ -133,15 +86,19 @@ module DomainPrefix
|
|
133
86
|
suffix.join(SEPARATOR)
|
134
87
|
end
|
135
88
|
|
136
|
-
|
137
|
-
|
89
|
+
# Returns the very top-level domain for a given domain, or nil if one cannot
|
90
|
+
# be determined.
|
91
|
+
def tld(fqdn)
|
92
|
+
suffix = public_suffix(rfc3492_canonical_domain(fqdn))
|
138
93
|
|
139
94
|
suffix and suffix.split(SEPARATOR).last
|
140
95
|
end
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
96
|
+
|
97
|
+
# Returns the name component of a given domain or nil if one cannot be
|
98
|
+
# determined.
|
99
|
+
def name(fqdn)
|
100
|
+
if (fqdn = registered_domain(fqdn))
|
101
|
+
fqdn.split(SEPARATOR).first
|
145
102
|
else
|
146
103
|
nil
|
147
104
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
class DomainPrefix::Tree < Hash
|
2
|
+
def insert(path)
|
3
|
+
components = path.sub(/^!/, '').split(DomainPrefix::SEPARATOR).reverse
|
4
|
+
|
5
|
+
leaves = components.inject([ self ]) do |trees, part|
|
6
|
+
[ part, SimpleIDN.to_unicode(part), SimpleIDN.to_ascii(part) ].uniq.flat_map do |l|
|
7
|
+
trees.collect do |tree|
|
8
|
+
tree[l] ||= self.class.new
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
required = path.match(/^[\!]/) ? 0 : 1
|
14
|
+
|
15
|
+
leaves.each do |leaf|
|
16
|
+
leaf[:required] = required
|
17
|
+
end
|
18
|
+
|
19
|
+
self
|
20
|
+
end
|
21
|
+
|
22
|
+
def follow(path)
|
23
|
+
path = path.to_s.split(SEPARATOR) unless (path.is_a?(Array))
|
24
|
+
path = path.reverse
|
25
|
+
|
26
|
+
index = traverse(path)
|
27
|
+
|
28
|
+
index and index <= path.length and path[0, index].reverse
|
29
|
+
end
|
30
|
+
|
31
|
+
protected
|
32
|
+
def traverse(path, index = 0)
|
33
|
+
component = path[index]
|
34
|
+
|
35
|
+
unless (component)
|
36
|
+
return self[:required] == 0 ? index : nil
|
37
|
+
end
|
38
|
+
|
39
|
+
named_branch = self[component]
|
40
|
+
|
41
|
+
if (named_branch)
|
42
|
+
result = named_branch.traverse(path, index + 1)
|
43
|
+
|
44
|
+
return result if (result)
|
45
|
+
end
|
46
|
+
|
47
|
+
wildcard_branch = self["*"]
|
48
|
+
|
49
|
+
if (wildcard_branch)
|
50
|
+
result = wildcard_branch.traverse(path, index + 1)
|
51
|
+
|
52
|
+
return result if (result)
|
53
|
+
end
|
54
|
+
|
55
|
+
if (!named_branch and !wildcard_branch and self[:required])
|
56
|
+
return index + self[:required]
|
57
|
+
end
|
58
|
+
|
59
|
+
return
|
60
|
+
end
|
61
|
+
end
|
data/test/helper.rb
CHANGED
data/test/sample/test.txt
CHANGED
@@ -60,12 +60,12 @@ checkPublicSuffix('a.b.c.kobe.jp', 'b.c.kobe.jp');
|
|
60
60
|
checkPublicSuffix('city.kobe.jp', 'city.kobe.jp');
|
61
61
|
checkPublicSuffix('www.city.kobe.jp', 'city.kobe.jp');
|
62
62
|
// TLD with a wildcard rule and exceptions.
|
63
|
-
checkPublicSuffix('
|
64
|
-
checkPublicSuffix('test.
|
65
|
-
checkPublicSuffix('b.test.
|
66
|
-
checkPublicSuffix('a.b.test.
|
67
|
-
checkPublicSuffix('
|
68
|
-
checkPublicSuffix('www.
|
63
|
+
checkPublicSuffix('ck', null);
|
64
|
+
checkPublicSuffix('test.ck', null);
|
65
|
+
checkPublicSuffix('b.test.ck', 'b.test.ck');
|
66
|
+
checkPublicSuffix('a.b.test.ck', 'b.test.ck');
|
67
|
+
checkPublicSuffix('www.ck', 'www.ck');
|
68
|
+
checkPublicSuffix('www.www.ck', 'www.ck');
|
69
69
|
// US K12.
|
70
70
|
checkPublicSuffix('us', null);
|
71
71
|
checkPublicSuffix('test.us', 'test.us');
|
@@ -75,4 +75,24 @@ checkPublicSuffix('test.ak.us', 'test.ak.us');
|
|
75
75
|
checkPublicSuffix('www.test.ak.us', 'test.ak.us');
|
76
76
|
checkPublicSuffix('k12.ak.us', null);
|
77
77
|
checkPublicSuffix('test.k12.ak.us', 'test.k12.ak.us');
|
78
|
-
checkPublicSuffix('www.test.k12.ak.us', 'test.k12.ak.us');
|
78
|
+
checkPublicSuffix('www.test.k12.ak.us', 'test.k12.ak.us');
|
79
|
+
// IDN labels.
|
80
|
+
checkPublicSuffix('食狮.com.cn', '食狮.com.cn');
|
81
|
+
checkPublicSuffix('食狮.公司.cn', '食狮.公司.cn');
|
82
|
+
checkPublicSuffix('www.食狮.公司.cn', '食狮.公司.cn');
|
83
|
+
checkPublicSuffix('shishi.公司.cn', 'shishi.公司.cn');
|
84
|
+
checkPublicSuffix('公司.cn', null);
|
85
|
+
checkPublicSuffix('食狮.中国', '食狮.中国');
|
86
|
+
checkPublicSuffix('www.食狮.中国', '食狮.中国');
|
87
|
+
checkPublicSuffix('shishi.中国', 'shishi.中国');
|
88
|
+
checkPublicSuffix('中国', null);
|
89
|
+
// Same as above, but punycoded.
|
90
|
+
checkPublicSuffix('xn--85x722f.com.cn', 'xn--85x722f.com.cn');
|
91
|
+
checkPublicSuffix('xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn');
|
92
|
+
checkPublicSuffix('www.xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn');
|
93
|
+
checkPublicSuffix('shishi.xn--55qx5d.cn', 'shishi.xn--55qx5d.cn');
|
94
|
+
checkPublicSuffix('xn--55qx5d.cn', null);
|
95
|
+
checkPublicSuffix('xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s');
|
96
|
+
checkPublicSuffix('www.xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s');
|
97
|
+
checkPublicSuffix('shishi.xn--fiqs8s', 'shishi.xn--fiqs8s');
|
98
|
+
checkPublicSuffix('xn--fiqs8s', null);
|
data/test/test_domain_prefix.rb
CHANGED
@@ -2,6 +2,7 @@ require_relative 'helper'
|
|
2
2
|
|
3
3
|
class TestDomainPrefix < Test::Unit::TestCase
|
4
4
|
def test_initialization
|
5
|
+
assert DomainPrefix::TLD_SET.length > 0
|
5
6
|
end
|
6
7
|
|
7
8
|
def test_examples
|
@@ -15,7 +16,7 @@ class TestDomainPrefix < Test::Unit::TestCase
|
|
15
16
|
'example.gc.ca' => %w[ example.gc.ca gc.ca ],
|
16
17
|
'example.co.uk' => %w[ example.co.uk co.uk ],
|
17
18
|
'example.com.au' => %w[ example.com.au com.au ],
|
18
|
-
'example.au' => [
|
19
|
+
'example.au' => %w[ example.au au ],
|
19
20
|
'example.bar.jp' => %w[ bar.jp jp ],
|
20
21
|
'example.bar.hokkaido.jp' =>%w[ bar.hokkaido.jp hokkaido.jp ],
|
21
22
|
'example.metro.tokyo.jp' => %w[ metro.tokyo.jp tokyo.jp ]
|
metadata
CHANGED
@@ -1,16 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: domain_prefix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.3.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- tadman
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
13
|
-
dependencies:
|
11
|
+
date: 2014-02-28 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: simpleidn
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.0.5
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.0.5
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: turn
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.9.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.9.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: jeweler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 2.0.0
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 2.0.0
|
14
55
|
description: A library to extract information about top-level domain and registered
|
15
56
|
name from generic and international domain names
|
16
57
|
email: github@tadman.ca
|
@@ -20,7 +61,9 @@ extra_rdoc_files:
|
|
20
61
|
- LICENSE
|
21
62
|
- README.md
|
22
63
|
files:
|
23
|
-
- .document
|
64
|
+
- ".document"
|
65
|
+
- Gemfile
|
66
|
+
- Gemfile.lock
|
24
67
|
- LICENSE
|
25
68
|
- README.md
|
26
69
|
- Rakefile
|
@@ -28,32 +71,32 @@ files:
|
|
28
71
|
- data/effective_tld_names.dat
|
29
72
|
- domain_prefix.gemspec
|
30
73
|
- lib/domain_prefix.rb
|
74
|
+
- lib/domain_prefix/tree.rb
|
31
75
|
- test/helper.rb
|
32
76
|
- test/sample/README
|
33
77
|
- test/sample/test.txt
|
34
78
|
- test/test_domain_prefix.rb
|
35
79
|
homepage: http://github.com/twg/domain_prefix
|
36
80
|
licenses: []
|
81
|
+
metadata: {}
|
37
82
|
post_install_message:
|
38
83
|
rdoc_options: []
|
39
84
|
require_paths:
|
40
85
|
- lib
|
41
86
|
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
87
|
requirements:
|
44
|
-
- -
|
88
|
+
- - ">="
|
45
89
|
- !ruby/object:Gem::Version
|
46
90
|
version: '0'
|
47
91
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
-
none: false
|
49
92
|
requirements:
|
50
|
-
- -
|
93
|
+
- - ">="
|
51
94
|
- !ruby/object:Gem::Version
|
52
95
|
version: '0'
|
53
96
|
requirements: []
|
54
97
|
rubyforge_project:
|
55
|
-
rubygems_version:
|
98
|
+
rubygems_version: 2.2.0
|
56
99
|
signing_key:
|
57
|
-
specification_version:
|
100
|
+
specification_version: 4
|
58
101
|
summary: Domain Prefix Extraction Library
|
59
102
|
test_files: []
|