domain_prefix 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/LICENSE +20 -0
- data/README.rdoc +18 -0
- data/Rakefile +30 -0
- data/VERSION +1 -0
- data/data/effective_tld_names.dat +5197 -0
- data/domain_prefix.gemspec +47 -0
- data/lib/domain_prefix.rb +112 -0
- data/test/helper.rb +35 -0
- data/test/sample/README +1 -0
- data/test/sample/test.txt +76 -0
- data/test/test_tldifier.rb +49 -0
- metadata +59 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Generated by jeweler
|
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
|
4
|
+
# -*- encoding: utf-8 -*-
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |s|
|
|
7
|
+
s.name = "domain_prefix"
|
|
8
|
+
s.version = "0.1.0"
|
|
9
|
+
|
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
|
+
s.authors = ["tadman"]
|
|
12
|
+
s.date = "2012-02-03"
|
|
13
|
+
s.description = "A library to extract information about top-level domain and registered name from generic and international domain names"
|
|
14
|
+
s.email = "github@tadman.ca"
|
|
15
|
+
s.extra_rdoc_files = [
|
|
16
|
+
"LICENSE",
|
|
17
|
+
"README.rdoc"
|
|
18
|
+
]
|
|
19
|
+
s.files = [
|
|
20
|
+
".document",
|
|
21
|
+
"LICENSE",
|
|
22
|
+
"README.rdoc",
|
|
23
|
+
"Rakefile",
|
|
24
|
+
"VERSION",
|
|
25
|
+
"data/effective_tld_names.dat",
|
|
26
|
+
"domain_prefix.gemspec",
|
|
27
|
+
"lib/domain_prefix.rb",
|
|
28
|
+
"test/helper.rb",
|
|
29
|
+
"test/sample/README",
|
|
30
|
+
"test/sample/test.txt",
|
|
31
|
+
"test/test_tldifier.rb"
|
|
32
|
+
]
|
|
33
|
+
s.homepage = "http://github.com/twg/domain_prefix"
|
|
34
|
+
s.require_paths = ["lib"]
|
|
35
|
+
s.rubygems_version = "1.8.11"
|
|
36
|
+
s.summary = "Domain Prefix Extraction Library"
|
|
37
|
+
|
|
38
|
+
if s.respond_to? :specification_version then
|
|
39
|
+
s.specification_version = 3
|
|
40
|
+
|
|
41
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
|
42
|
+
else
|
|
43
|
+
end
|
|
44
|
+
else
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
|
|
3
|
+
module DomainPrefix
|
|
4
|
+
class TreeHash < Hash
|
|
5
|
+
def initialize
|
|
6
|
+
super do |h, k|
|
|
7
|
+
h[k] = TreeHash.new
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def find_domain(domain)
|
|
12
|
+
domain.split('.').inject(self) do |h, component|
|
|
13
|
+
h and h.key?(component) ? h[component] : nil
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
TLDIFIER_SOURCE_FILE = File.expand_path(File.join('..', 'data', 'effective_tld_names.dat'), File.dirname(__FILE__))
|
|
19
|
+
|
|
20
|
+
TLD_SET = File.read(TLDIFIER_SOURCE_FILE).split(/\n/).collect do |line|
|
|
21
|
+
line.sub(%r[//.*], '').sub(/\s+$/, '')
|
|
22
|
+
end.reject(&:empty?).freeze
|
|
23
|
+
|
|
24
|
+
TLD_NAMES = TLD_SET.sort_by do |d|
|
|
25
|
+
[ -d.length, d ]
|
|
26
|
+
end.freeze
|
|
27
|
+
|
|
28
|
+
TLD_TREE = TLD_NAMES.inject(TreeHash.new) do |h, name|
|
|
29
|
+
name.split('.').reverse.inject(h) do |_h, component|
|
|
30
|
+
case (component)
|
|
31
|
+
when '*'
|
|
32
|
+
_h
|
|
33
|
+
when /!(.*)/
|
|
34
|
+
_h[$1]
|
|
35
|
+
else
|
|
36
|
+
_h[component]
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
h
|
|
41
|
+
end.freeze
|
|
42
|
+
|
|
43
|
+
PREFIX_SPEC = Regexp.new(
|
|
44
|
+
'^(' + TLD_NAMES.collect do |d|
|
|
45
|
+
Regexp.escape(d).sub(/^\\\*\\\./, '')
|
|
46
|
+
end.join('|') + ')$'
|
|
47
|
+
).freeze
|
|
48
|
+
|
|
49
|
+
ALLOWED_DOMAIN_PREFIXES = Hash[
|
|
50
|
+
TLD_NAMES.select do |d|
|
|
51
|
+
d.match(/^\!/)
|
|
52
|
+
end.collect do |d|
|
|
53
|
+
[ d.sub(/^\!/, ''), true ]
|
|
54
|
+
end
|
|
55
|
+
].freeze
|
|
56
|
+
|
|
57
|
+
DOMAIN_PREFIX_SPEC = Regexp.new(
|
|
58
|
+
'^(?:[^\.]+\.)*?(([^\.]+)\.(' + TLD_NAMES.collect do |d|
|
|
59
|
+
Regexp.escape(d).sub(/^\\\*\\\./, '[^\.]+\.')
|
|
60
|
+
end.join('|') + '))$'
|
|
61
|
+
).freeze
|
|
62
|
+
|
|
63
|
+
NONPUBLIC_TLD = {
|
|
64
|
+
'local' => true
|
|
65
|
+
}.freeze
|
|
66
|
+
|
|
67
|
+
def rfc3492_canonical_domain(domain)
|
|
68
|
+
# FIX: Full implementation of http://www.ietf.org/rfc/rfc3492.txt required
|
|
69
|
+
domain and domain.downcase
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def registered_domain(domain)
|
|
73
|
+
m = DOMAIN_PREFIX_SPEC.match(rfc3492_canonical_domain(domain))
|
|
74
|
+
|
|
75
|
+
return unless (m)
|
|
76
|
+
|
|
77
|
+
domain = m[1]
|
|
78
|
+
suffix = m[3]
|
|
79
|
+
|
|
80
|
+
return if (NONPUBLIC_TLD[suffix])
|
|
81
|
+
return if (PREFIX_SPEC.match(domain) and !ALLOWED_DOMAIN_PREFIXES[domain])
|
|
82
|
+
|
|
83
|
+
domain
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def public_suffix(domain)
|
|
87
|
+
m = DOMAIN_PREFIX_SPEC.match(rfc3492_canonical_domain(domain))
|
|
88
|
+
|
|
89
|
+
return unless (m)
|
|
90
|
+
|
|
91
|
+
domain = m[1]
|
|
92
|
+
suffix = m[3]
|
|
93
|
+
|
|
94
|
+
return if (PREFIX_SPEC.match(domain) and !ALLOWED_DOMAIN_PREFIXES[domain])
|
|
95
|
+
|
|
96
|
+
suffix
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def tld(domain)
|
|
100
|
+
suffix = public_suffix(rfc3492_canonical_domain(domain))
|
|
101
|
+
|
|
102
|
+
suffix and suffix.split(/\./).last
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def name(domain)
|
|
106
|
+
m = DOMAIN_PREFIX_SPEC.match(rfc3492_canonical_domain(domain))
|
|
107
|
+
|
|
108
|
+
m and m[2]
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
extend self
|
|
112
|
+
end
|
data/test/helper.rb
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'test/unit'
|
|
3
|
+
|
|
4
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
5
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
6
|
+
|
|
7
|
+
require 'domain_prefix'
|
|
8
|
+
|
|
9
|
+
class Test::Unit::TestCase
|
|
10
|
+
def assert_mapping(map)
|
|
11
|
+
result_map = map.inject({ }) do |h, (k,v)|
|
|
12
|
+
h[k] = yield(k)
|
|
13
|
+
h
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
differences = result_map.inject([ ]) do |a, (k,v)|
|
|
17
|
+
if (v != map[k])
|
|
18
|
+
a << k
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
a
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
assert_equal(map, result_map, differences.collect { |s| "Input: #{s.inspect}\n Expected: #{map[s].inspect}\n Result: #{result_map[s].inspect}\n" }.join(''))
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def sample_data(file)
|
|
28
|
+
File.read(
|
|
29
|
+
File.expand_path(
|
|
30
|
+
File.join('sample', file),
|
|
31
|
+
File.dirname(__FILE__)
|
|
32
|
+
)
|
|
33
|
+
)
|
|
34
|
+
end
|
|
35
|
+
end
|
data/test/sample/README
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Sample data pulled from http://publicsuffix.org/list/test.txt
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# Any copyright is dedicated to the Public Domain.
|
|
2
|
+
# http://creativecommons.org/publicdomain/zero/1.0/
|
|
3
|
+
|
|
4
|
+
# NULL input.
|
|
5
|
+
checkPublicSuffix(NULL, NULL);
|
|
6
|
+
# Mixed case.
|
|
7
|
+
checkPublicSuffix('COM', NULL);
|
|
8
|
+
checkPublicSuffix('example.COM', 'example.com');
|
|
9
|
+
checkPublicSuffix('WwW.example.COM', 'example.com');
|
|
10
|
+
# Leading dot.
|
|
11
|
+
checkPublicSuffix('.com', NULL);
|
|
12
|
+
checkPublicSuffix('.example', NULL);
|
|
13
|
+
checkPublicSuffix('.example.com', NULL);
|
|
14
|
+
checkPublicSuffix('.example.example', NULL);
|
|
15
|
+
# Unlisted TLD.
|
|
16
|
+
checkPublicSuffix('example', NULL);
|
|
17
|
+
checkPublicSuffix('example.example', NULL);
|
|
18
|
+
checkPublicSuffix('b.example.example', NULL);
|
|
19
|
+
checkPublicSuffix('a.b.example.example', NULL);
|
|
20
|
+
# Listed, but non-Internet, TLD.
|
|
21
|
+
checkPublicSuffix('local', NULL);
|
|
22
|
+
checkPublicSuffix('example.local', NULL);
|
|
23
|
+
checkPublicSuffix('b.example.local', NULL);
|
|
24
|
+
checkPublicSuffix('a.b.example.local', NULL);
|
|
25
|
+
# TLD with only 1 rule.
|
|
26
|
+
checkPublicSuffix('biz', NULL);
|
|
27
|
+
checkPublicSuffix('domain.biz', 'domain.biz');
|
|
28
|
+
checkPublicSuffix('b.domain.biz', 'domain.biz');
|
|
29
|
+
checkPublicSuffix('a.b.domain.biz', 'domain.biz');
|
|
30
|
+
# TLD with some 2-level rules.
|
|
31
|
+
checkPublicSuffix('com', NULL);
|
|
32
|
+
checkPublicSuffix('example.com', 'example.com');
|
|
33
|
+
checkPublicSuffix('b.example.com', 'example.com');
|
|
34
|
+
checkPublicSuffix('a.b.example.com', 'example.com');
|
|
35
|
+
checkPublicSuffix('uk.com', NULL);
|
|
36
|
+
checkPublicSuffix('example.uk.com', 'example.uk.com');
|
|
37
|
+
checkPublicSuffix('b.example.uk.com', 'example.uk.com');
|
|
38
|
+
checkPublicSuffix('a.b.example.uk.com', 'example.uk.com');
|
|
39
|
+
checkPublicSuffix('test.ac', 'test.ac');
|
|
40
|
+
# TLD with only 1 (wildcard) rule.
|
|
41
|
+
checkPublicSuffix('cy', NULL);
|
|
42
|
+
checkPublicSuffix('c.cy', NULL);
|
|
43
|
+
checkPublicSuffix('b.c.cy', 'b.c.cy');
|
|
44
|
+
checkPublicSuffix('a.b.c.cy', 'b.c.cy');
|
|
45
|
+
# More complex TLD.
|
|
46
|
+
checkPublicSuffix('jp', NULL);
|
|
47
|
+
checkPublicSuffix('test.jp', 'test.jp');
|
|
48
|
+
checkPublicSuffix('www.test.jp', 'test.jp');
|
|
49
|
+
checkPublicSuffix('ac.jp', NULL);
|
|
50
|
+
checkPublicSuffix('test.ac.jp', 'test.ac.jp');
|
|
51
|
+
checkPublicSuffix('www.test.ac.jp', 'test.ac.jp');
|
|
52
|
+
checkPublicSuffix('kyoto.jp', NULL);
|
|
53
|
+
checkPublicSuffix('c.kyoto.jp', NULL);
|
|
54
|
+
checkPublicSuffix('b.c.kyoto.jp', 'b.c.kyoto.jp');
|
|
55
|
+
checkPublicSuffix('a.b.c.kyoto.jp', 'b.c.kyoto.jp');
|
|
56
|
+
checkPublicSuffix('pref.kyoto.jp', 'pref.kyoto.jp'); # Exception rule.
|
|
57
|
+
checkPublicSuffix('www.pref.kyoto.jp', 'pref.kyoto.jp'); # Exception rule.
|
|
58
|
+
checkPublicSuffix('city.kyoto.jp', 'city.kyoto.jp'); # Exception rule.
|
|
59
|
+
checkPublicSuffix('www.city.kyoto.jp', 'city.kyoto.jp'); # Exception rule.
|
|
60
|
+
# TLD with a wildcard rule and exceptions.
|
|
61
|
+
checkPublicSuffix('om', NULL);
|
|
62
|
+
checkPublicSuffix('test.om', NULL);
|
|
63
|
+
checkPublicSuffix('b.test.om', 'b.test.om');
|
|
64
|
+
checkPublicSuffix('a.b.test.om', 'b.test.om');
|
|
65
|
+
checkPublicSuffix('songfest.om', 'songfest.om');
|
|
66
|
+
checkPublicSuffix('www.songfest.om', 'songfest.om');
|
|
67
|
+
# US K12.
|
|
68
|
+
checkPublicSuffix('us', NULL);
|
|
69
|
+
checkPublicSuffix('test.us', 'test.us');
|
|
70
|
+
checkPublicSuffix('www.test.us', 'test.us');
|
|
71
|
+
checkPublicSuffix('ak.us', NULL);
|
|
72
|
+
checkPublicSuffix('test.ak.us', 'test.ak.us');
|
|
73
|
+
checkPublicSuffix('www.test.ak.us', 'test.ak.us');
|
|
74
|
+
checkPublicSuffix('k12.ak.us', NULL);
|
|
75
|
+
checkPublicSuffix('test.k12.ak.us', 'test.k12.ak.us');
|
|
76
|
+
checkPublicSuffix('www.test.k12.ak.us', 'test.k12.ak.us');
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
require 'helper'
|
|
2
|
+
|
|
3
|
+
class TestDomainPrefix < Test::Unit::TestCase
|
|
4
|
+
def test_initialization
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
def test_examples
|
|
8
|
+
assert_mapping(
|
|
9
|
+
'com' => [ nil, nil ],
|
|
10
|
+
'example.com' => %w[ example.com com ],
|
|
11
|
+
'uk.com' => [ nil, nil ],
|
|
12
|
+
'example.uk.com' => %w[ example.uk.com uk.com ],
|
|
13
|
+
'example.ca' => %w[ example.ca ca ],
|
|
14
|
+
'example.on.ca' => %w[ example.on.ca on.ca ],
|
|
15
|
+
'example.gc.ca' => %w[ example.gc.ca gc.ca ],
|
|
16
|
+
'example.co.uk' => %w[ example.co.uk co.uk ],
|
|
17
|
+
'example.au' => [ nil, nil ],
|
|
18
|
+
'example.com.au' => %w[ example.com.au com.au ],
|
|
19
|
+
'example.bar.jp' => %w[ bar.jp jp ],
|
|
20
|
+
'example.bar.hokkaido.jp' =>%w[ example.bar.hokkaido.jp bar.hokkaido.jp ],
|
|
21
|
+
'example.metro.tokyo.jp' => %w[ example.metro.tokyo.jp metro.tokyo.jp ]
|
|
22
|
+
) do |domain|
|
|
23
|
+
[
|
|
24
|
+
DomainPrefix.registered_domain(domain),
|
|
25
|
+
DomainPrefix.public_suffix(domain)
|
|
26
|
+
]
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def test_public_suffix_samples
|
|
31
|
+
sample_data('test.txt').split(/\n/).collect do |line|
|
|
32
|
+
case (line)
|
|
33
|
+
when /checkPublicSuffix\((\S+),\s*(\S+)\)/
|
|
34
|
+
[ $1, $2 ].collect do |part|
|
|
35
|
+
case (part)
|
|
36
|
+
when 'NULL'
|
|
37
|
+
nil
|
|
38
|
+
else
|
|
39
|
+
part.gsub(/'/, '')
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
else
|
|
43
|
+
nil
|
|
44
|
+
end
|
|
45
|
+
end.each do |domain, expected|
|
|
46
|
+
assert_equal expected, DomainPrefix.registered_domain(domain), "#{domain.inspect} -> #{expected.inspect}"
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: domain_prefix
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
prerelease:
|
|
6
|
+
platform: ruby
|
|
7
|
+
authors:
|
|
8
|
+
- tadman
|
|
9
|
+
autorequire:
|
|
10
|
+
bindir: bin
|
|
11
|
+
cert_chain: []
|
|
12
|
+
date: 2012-02-03 00:00:00.000000000 Z
|
|
13
|
+
dependencies: []
|
|
14
|
+
description: A library to extract information about top-level domain and registered
|
|
15
|
+
name from generic and international domain names
|
|
16
|
+
email: github@tadman.ca
|
|
17
|
+
executables: []
|
|
18
|
+
extensions: []
|
|
19
|
+
extra_rdoc_files:
|
|
20
|
+
- LICENSE
|
|
21
|
+
- README.rdoc
|
|
22
|
+
files:
|
|
23
|
+
- .document
|
|
24
|
+
- LICENSE
|
|
25
|
+
- README.rdoc
|
|
26
|
+
- Rakefile
|
|
27
|
+
- VERSION
|
|
28
|
+
- data/effective_tld_names.dat
|
|
29
|
+
- domain_prefix.gemspec
|
|
30
|
+
- lib/domain_prefix.rb
|
|
31
|
+
- test/helper.rb
|
|
32
|
+
- test/sample/README
|
|
33
|
+
- test/sample/test.txt
|
|
34
|
+
- test/test_tldifier.rb
|
|
35
|
+
homepage: http://github.com/twg/domain_prefix
|
|
36
|
+
licenses: []
|
|
37
|
+
post_install_message:
|
|
38
|
+
rdoc_options: []
|
|
39
|
+
require_paths:
|
|
40
|
+
- lib
|
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
42
|
+
none: false
|
|
43
|
+
requirements:
|
|
44
|
+
- - ! '>='
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '0'
|
|
47
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
48
|
+
none: false
|
|
49
|
+
requirements:
|
|
50
|
+
- - ! '>='
|
|
51
|
+
- !ruby/object:Gem::Version
|
|
52
|
+
version: '0'
|
|
53
|
+
requirements: []
|
|
54
|
+
rubyforge_project:
|
|
55
|
+
rubygems_version: 1.8.11
|
|
56
|
+
signing_key:
|
|
57
|
+
specification_version: 3
|
|
58
|
+
summary: Domain Prefix Extraction Library
|
|
59
|
+
test_files: []
|