domain_prefix 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,47 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "domain_prefix"
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["tadman"]
12
+ s.date = "2012-02-03"
13
+ s.description = "A library to extract information about top-level domain and registered name from generic and international domain names"
14
+ s.email = "github@tadman.ca"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ "LICENSE",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "data/effective_tld_names.dat",
26
+ "domain_prefix.gemspec",
27
+ "lib/domain_prefix.rb",
28
+ "test/helper.rb",
29
+ "test/sample/README",
30
+ "test/sample/test.txt",
31
+ "test/test_tldifier.rb"
32
+ ]
33
+ s.homepage = "http://github.com/twg/domain_prefix"
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = "1.8.11"
36
+ s.summary = "Domain Prefix Extraction Library"
37
+
38
+ if s.respond_to? :specification_version then
39
+ s.specification_version = 3
40
+
41
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
42
+ else
43
+ end
44
+ else
45
+ end
46
+ end
47
+
@@ -0,0 +1,112 @@
1
+ # encoding: UTF-8
2
+
3
+ module DomainPrefix
4
+ class TreeHash < Hash
5
+ def initialize
6
+ super do |h, k|
7
+ h[k] = TreeHash.new
8
+ end
9
+ end
10
+
11
+ def find_domain(domain)
12
+ domain.split('.').inject(self) do |h, component|
13
+ h and h.key?(component) ? h[component] : nil
14
+ end
15
+ end
16
+ end
17
+
18
+ TLDIFIER_SOURCE_FILE = File.expand_path(File.join('..', 'data', 'effective_tld_names.dat'), File.dirname(__FILE__))
19
+
20
+ TLD_SET = File.read(TLDIFIER_SOURCE_FILE).split(/\n/).collect do |line|
21
+ line.sub(%r[//.*], '').sub(/\s+$/, '')
22
+ end.reject(&:empty?).freeze
23
+
24
+ TLD_NAMES = TLD_SET.sort_by do |d|
25
+ [ -d.length, d ]
26
+ end.freeze
27
+
28
+ TLD_TREE = TLD_NAMES.inject(TreeHash.new) do |h, name|
29
+ name.split('.').reverse.inject(h) do |_h, component|
30
+ case (component)
31
+ when '*'
32
+ _h
33
+ when /!(.*)/
34
+ _h[$1]
35
+ else
36
+ _h[component]
37
+ end
38
+ end
39
+
40
+ h
41
+ end.freeze
42
+
43
+ PREFIX_SPEC = Regexp.new(
44
+ '^(' + TLD_NAMES.collect do |d|
45
+ Regexp.escape(d).sub(/^\\\*\\\./, '')
46
+ end.join('|') + ')$'
47
+ ).freeze
48
+
49
+ ALLOWED_DOMAIN_PREFIXES = Hash[
50
+ TLD_NAMES.select do |d|
51
+ d.match(/^\!/)
52
+ end.collect do |d|
53
+ [ d.sub(/^\!/, ''), true ]
54
+ end
55
+ ].freeze
56
+
57
+ DOMAIN_PREFIX_SPEC = Regexp.new(
58
+ '^(?:[^\.]+\.)*?(([^\.]+)\.(' + TLD_NAMES.collect do |d|
59
+ Regexp.escape(d).sub(/^\\\*\\\./, '[^\.]+\.')
60
+ end.join('|') + '))$'
61
+ ).freeze
62
+
63
+ NONPUBLIC_TLD = {
64
+ 'local' => true
65
+ }.freeze
66
+
67
+ def rfc3492_canonical_domain(domain)
68
+ # FIX: Full implementation of http://www.ietf.org/rfc/rfc3492.txt required
69
+ domain and domain.downcase
70
+ end
71
+
72
+ def registered_domain(domain)
73
+ m = DOMAIN_PREFIX_SPEC.match(rfc3492_canonical_domain(domain))
74
+
75
+ return unless (m)
76
+
77
+ domain = m[1]
78
+ suffix = m[3]
79
+
80
+ return if (NONPUBLIC_TLD[suffix])
81
+ return if (PREFIX_SPEC.match(domain) and !ALLOWED_DOMAIN_PREFIXES[domain])
82
+
83
+ domain
84
+ end
85
+
86
+ def public_suffix(domain)
87
+ m = DOMAIN_PREFIX_SPEC.match(rfc3492_canonical_domain(domain))
88
+
89
+ return unless (m)
90
+
91
+ domain = m[1]
92
+ suffix = m[3]
93
+
94
+ return if (PREFIX_SPEC.match(domain) and !ALLOWED_DOMAIN_PREFIXES[domain])
95
+
96
+ suffix
97
+ end
98
+
99
+ def tld(domain)
100
+ suffix = public_suffix(rfc3492_canonical_domain(domain))
101
+
102
+ suffix and suffix.split(/\./).last
103
+ end
104
+
105
+ def name(domain)
106
+ m = DOMAIN_PREFIX_SPEC.match(rfc3492_canonical_domain(domain))
107
+
108
+ m and m[2]
109
+ end
110
+
111
+ extend self
112
+ end
@@ -0,0 +1,35 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+
4
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
5
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
6
+
7
+ require 'domain_prefix'
8
+
9
+ class Test::Unit::TestCase
10
+ def assert_mapping(map)
11
+ result_map = map.inject({ }) do |h, (k,v)|
12
+ h[k] = yield(k)
13
+ h
14
+ end
15
+
16
+ differences = result_map.inject([ ]) do |a, (k,v)|
17
+ if (v != map[k])
18
+ a << k
19
+ end
20
+
21
+ a
22
+ end
23
+
24
+ assert_equal(map, result_map, differences.collect { |s| "Input: #{s.inspect}\n Expected: #{map[s].inspect}\n Result: #{result_map[s].inspect}\n" }.join(''))
25
+ end
26
+
27
+ def sample_data(file)
28
+ File.read(
29
+ File.expand_path(
30
+ File.join('sample', file),
31
+ File.dirname(__FILE__)
32
+ )
33
+ )
34
+ end
35
+ end
@@ -0,0 +1 @@
1
+ Sample data pulled from http://publicsuffix.org/list/test.txt
@@ -0,0 +1,76 @@
1
+ # Any copyright is dedicated to the Public Domain.
2
+ # http://creativecommons.org/publicdomain/zero/1.0/
3
+
4
+ # NULL input.
5
+ checkPublicSuffix(NULL, NULL);
6
+ # Mixed case.
7
+ checkPublicSuffix('COM', NULL);
8
+ checkPublicSuffix('example.COM', 'example.com');
9
+ checkPublicSuffix('WwW.example.COM', 'example.com');
10
+ # Leading dot.
11
+ checkPublicSuffix('.com', NULL);
12
+ checkPublicSuffix('.example', NULL);
13
+ checkPublicSuffix('.example.com', NULL);
14
+ checkPublicSuffix('.example.example', NULL);
15
+ # Unlisted TLD.
16
+ checkPublicSuffix('example', NULL);
17
+ checkPublicSuffix('example.example', NULL);
18
+ checkPublicSuffix('b.example.example', NULL);
19
+ checkPublicSuffix('a.b.example.example', NULL);
20
+ # Listed, but non-Internet, TLD.
21
+ checkPublicSuffix('local', NULL);
22
+ checkPublicSuffix('example.local', NULL);
23
+ checkPublicSuffix('b.example.local', NULL);
24
+ checkPublicSuffix('a.b.example.local', NULL);
25
+ # TLD with only 1 rule.
26
+ checkPublicSuffix('biz', NULL);
27
+ checkPublicSuffix('domain.biz', 'domain.biz');
28
+ checkPublicSuffix('b.domain.biz', 'domain.biz');
29
+ checkPublicSuffix('a.b.domain.biz', 'domain.biz');
30
+ # TLD with some 2-level rules.
31
+ checkPublicSuffix('com', NULL);
32
+ checkPublicSuffix('example.com', 'example.com');
33
+ checkPublicSuffix('b.example.com', 'example.com');
34
+ checkPublicSuffix('a.b.example.com', 'example.com');
35
+ checkPublicSuffix('uk.com', NULL);
36
+ checkPublicSuffix('example.uk.com', 'example.uk.com');
37
+ checkPublicSuffix('b.example.uk.com', 'example.uk.com');
38
+ checkPublicSuffix('a.b.example.uk.com', 'example.uk.com');
39
+ checkPublicSuffix('test.ac', 'test.ac');
40
+ # TLD with only 1 (wildcard) rule.
41
+ checkPublicSuffix('cy', NULL);
42
+ checkPublicSuffix('c.cy', NULL);
43
+ checkPublicSuffix('b.c.cy', 'b.c.cy');
44
+ checkPublicSuffix('a.b.c.cy', 'b.c.cy');
45
+ # More complex TLD.
46
+ checkPublicSuffix('jp', NULL);
47
+ checkPublicSuffix('test.jp', 'test.jp');
48
+ checkPublicSuffix('www.test.jp', 'test.jp');
49
+ checkPublicSuffix('ac.jp', NULL);
50
+ checkPublicSuffix('test.ac.jp', 'test.ac.jp');
51
+ checkPublicSuffix('www.test.ac.jp', 'test.ac.jp');
52
+ checkPublicSuffix('kyoto.jp', NULL);
53
+ checkPublicSuffix('c.kyoto.jp', NULL);
54
+ checkPublicSuffix('b.c.kyoto.jp', 'b.c.kyoto.jp');
55
+ checkPublicSuffix('a.b.c.kyoto.jp', 'b.c.kyoto.jp');
56
+ checkPublicSuffix('pref.kyoto.jp', 'pref.kyoto.jp'); # Exception rule.
57
+ checkPublicSuffix('www.pref.kyoto.jp', 'pref.kyoto.jp'); # Exception rule.
58
+ checkPublicSuffix('city.kyoto.jp', 'city.kyoto.jp'); # Exception rule.
59
+ checkPublicSuffix('www.city.kyoto.jp', 'city.kyoto.jp'); # Exception rule.
60
+ # TLD with a wildcard rule and exceptions.
61
+ checkPublicSuffix('om', NULL);
62
+ checkPublicSuffix('test.om', NULL);
63
+ checkPublicSuffix('b.test.om', 'b.test.om');
64
+ checkPublicSuffix('a.b.test.om', 'b.test.om');
65
+ checkPublicSuffix('songfest.om', 'songfest.om');
66
+ checkPublicSuffix('www.songfest.om', 'songfest.om');
67
+ # US K12.
68
+ checkPublicSuffix('us', NULL);
69
+ checkPublicSuffix('test.us', 'test.us');
70
+ checkPublicSuffix('www.test.us', 'test.us');
71
+ checkPublicSuffix('ak.us', NULL);
72
+ checkPublicSuffix('test.ak.us', 'test.ak.us');
73
+ checkPublicSuffix('www.test.ak.us', 'test.ak.us');
74
+ checkPublicSuffix('k12.ak.us', NULL);
75
+ checkPublicSuffix('test.k12.ak.us', 'test.k12.ak.us');
76
+ checkPublicSuffix('www.test.k12.ak.us', 'test.k12.ak.us');
@@ -0,0 +1,49 @@
1
+ require 'helper'
2
+
3
+ class TestDomainPrefix < Test::Unit::TestCase
4
+ def test_initialization
5
+ end
6
+
7
+ def test_examples
8
+ assert_mapping(
9
+ 'com' => [ nil, nil ],
10
+ 'example.com' => %w[ example.com com ],
11
+ 'uk.com' => [ nil, nil ],
12
+ 'example.uk.com' => %w[ example.uk.com uk.com ],
13
+ 'example.ca' => %w[ example.ca ca ],
14
+ 'example.on.ca' => %w[ example.on.ca on.ca ],
15
+ 'example.gc.ca' => %w[ example.gc.ca gc.ca ],
16
+ 'example.co.uk' => %w[ example.co.uk co.uk ],
17
+ 'example.au' => [ nil, nil ],
18
+ 'example.com.au' => %w[ example.com.au com.au ],
19
+ 'example.bar.jp' => %w[ bar.jp jp ],
20
+ 'example.bar.hokkaido.jp' =>%w[ example.bar.hokkaido.jp bar.hokkaido.jp ],
21
+ 'example.metro.tokyo.jp' => %w[ example.metro.tokyo.jp metro.tokyo.jp ]
22
+ ) do |domain|
23
+ [
24
+ DomainPrefix.registered_domain(domain),
25
+ DomainPrefix.public_suffix(domain)
26
+ ]
27
+ end
28
+ end
29
+
30
+ def test_public_suffix_samples
31
+ sample_data('test.txt').split(/\n/).collect do |line|
32
+ case (line)
33
+ when /checkPublicSuffix\((\S+),\s*(\S+)\)/
34
+ [ $1, $2 ].collect do |part|
35
+ case (part)
36
+ when 'NULL'
37
+ nil
38
+ else
39
+ part.gsub(/'/, '')
40
+ end
41
+ end
42
+ else
43
+ nil
44
+ end
45
+ end.each do |domain, expected|
46
+ assert_equal expected, DomainPrefix.registered_domain(domain), "#{domain.inspect} -> #{expected.inspect}"
47
+ end
48
+ end
49
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: domain_prefix
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - tadman
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-03 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A library to extract information about top-level domain and registered
15
+ name from generic and international domain names
16
+ email: github@tadman.ca
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files:
20
+ - LICENSE
21
+ - README.rdoc
22
+ files:
23
+ - .document
24
+ - LICENSE
25
+ - README.rdoc
26
+ - Rakefile
27
+ - VERSION
28
+ - data/effective_tld_names.dat
29
+ - domain_prefix.gemspec
30
+ - lib/domain_prefix.rb
31
+ - test/helper.rb
32
+ - test/sample/README
33
+ - test/sample/test.txt
34
+ - test/test_tldifier.rb
35
+ homepage: http://github.com/twg/domain_prefix
36
+ licenses: []
37
+ post_install_message:
38
+ rdoc_options: []
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ none: false
49
+ requirements:
50
+ - - ! '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ requirements: []
54
+ rubyforge_project:
55
+ rubygems_version: 1.8.11
56
+ signing_key:
57
+ specification_version: 3
58
+ summary: Domain Prefix Extraction Library
59
+ test_files: []